if not.
+ // if (c < ASIZE)
+ // j += bc[pattern[j+m-1]];
+ // else
+ // j += m
+ // #endif
+ // }
+ // return -1;
+ // }
+
+ // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result
+ Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
+ BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
+
+ Register haystack_end = haystack_len;
+ Register skipch = tmp2;
+
+ // pattern length is >=8, so, we can read at least 1 register for cases when
+ // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
+ // UL case. We'll re-read last character in inner pre-loop code to have
+ // single outer pre-loop load
+ const int firstStep = isLL ? 7 : 3;
+
+ const int ASIZE = 256;
+ const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd)
+
+ sub(sp, sp, ASIZE);
+
+ // init BC offset table with default value: needle_len
+ slli(t0, needle_len, 8);
+ orr(t0, t0, needle_len); // [63...16][needle_len][needle_len]
+ slli(tmp1, t0, 16);
+ orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len]
+ slli(tmp1, t0, 32);
+ orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len]
+
+ mv(ch1, sp); // ch1 is t0
+ mv(tmp6, ASIZE / STORE_BYTES); // loop iterations
+
+ bind(BM_INIT_LOOP);
+ // for (i = 0; i < ASIZE; ++i)
+ // bc[i] = m;
+ for (int i = 0; i < 4; i++) {
+ sd(tmp5, Address(ch1, i * wordSize));
+ }
+ add(ch1, ch1, 32);
+ sub(tmp6, tmp6, 4);
+ bgtz(tmp6, BM_INIT_LOOP);
+
+ sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern
+ Register orig_haystack = tmp5;
+ mv(orig_haystack, haystack);
+ // result_tmp = tmp4
+ shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift);
+ sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1
+ mv(tmp3, needle);
+
+ // for (i = 0; i < m - 1; ) {
+ // c = pattern[i];
+ // ++i;
+ // // c < 256 for Latin1 string, so, no need for branch
+ // #ifdef PATTERN_STRING_IS_LATIN1
+ // bc[c] = m - i;
+ // #else
+ // if (c < ASIZE) bc[c] = m - i;
+ // #endif
+ // }
+ bind(BCLOOP);
+ (this->*needle_load_1chr)(ch1, Address(tmp3), noreg);
+ add(tmp3, tmp3, needle_chr_size);
+ if (!needle_isL) {
+ // ae == StrIntrinsicNode::UU
+ mv(tmp6, ASIZE);
+ bgeu(ch1, tmp6, BCSKIP);
+ }
+ add(tmp4, sp, ch1);
+ sb(ch2, Address(tmp4)); // store skip offset to BC offset table
+
+ bind(BCSKIP);
+ sub(ch2, ch2, 1); // for next pattern element, skip distance -1
+ bgtz(ch2, BCLOOP);
+
+ // tmp6: pattern end, address after needle
+ shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift);
+ if (needle_isL == haystack_isL) {
+ // load last 8 bytes (8LL/4UU symbols)
+ ld(tmp6, Address(tmp6, -wordSize));
+ } else {
+ // UL: from UTF-16(source) search Latin1(pattern)
+ lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols)
+ // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d
+ // We'll have to wait until load completed, but it's still faster than per-character loads+checks
+ srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a
+ slli(ch2, tmp6, XLEN - 24);
+ srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b
+ slli(ch1, tmp6, XLEN - 16);
+ srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c
+ andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d
+ slli(ch2, ch2, 16);
+ orr(ch2, ch2, ch1); // 0x00000b0c
+ slli(result, tmp3, 48); // use result as temp register
+ orr(tmp6, tmp6, result); // 0x0a00000d
+ slli(result, ch2, 16);
+ orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d
+ }
+
+ // i = m - 1;
+ // skipch = j + i;
+ // if (skipch == pattern[m - 1]
+ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
+ // else
+ // move j with bad char offset table
+ bind(BMLOOPSTR2);
+ // compare pattern to source string backward
+ shadd(result, nlen_tmp, haystack, result, haystack_chr_shift);
+ (this->*haystack_load_1chr)(skipch, Address(result), noreg);
+ sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8
+ if (needle_isL == haystack_isL) {
+ // re-init tmp3. It's for free because it's executed in parallel with
+ // load above. Alternative is to initialize it before loop, but it'll
+ // affect performance on in-order systems with 2 or more ld/st pipelines
+ srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1]
+ }
+ if (!isLL) { // UU/UL case
+ slli(ch2, nlen_tmp, 1); // offsets in bytes
+ }
+ bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char
+ add(result, haystack, isLL ? nlen_tmp : ch2);
+ ld(ch2, Address(result)); // load 8 bytes from source string
+ mv(ch1, tmp6);
+ if (isLL) {
+ j(BMLOOPSTR1_AFTER_LOAD);
+ } else {
+ sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
+ j(BMLOOPSTR1_CMP);
+ }
+
+ bind(BMLOOPSTR1);
+ shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift);
+ (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
+ shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift);
+ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
+
+ bind(BMLOOPSTR1_AFTER_LOAD);
+ sub(nlen_tmp, nlen_tmp, 1);
+ bltz(nlen_tmp, BMLOOPSTR1_LASTCMP);
+
+ bind(BMLOOPSTR1_CMP);
+ beq(ch1, ch2, BMLOOPSTR1);
+
+ bind(BMSKIP);
+ if (!isLL) {
+ // if we've met UTF symbol while searching Latin1 pattern, then we can
+ // skip needle_len symbols
+ if (needle_isL != haystack_isL) {
+ mv(result_tmp, needle_len);
+ } else {
+ mv(result_tmp, 1);
+ }
+ mv(t0, ASIZE);
+ bgeu(skipch, t0, BMADV);
+ }
+ add(result_tmp, sp, skipch);
+ lbu(result_tmp, Address(result_tmp)); // load skip offset
+
+ bind(BMADV);
+ sub(nlen_tmp, needle_len, 1);
+ // move haystack after bad char skip offset
+ shadd(haystack, result_tmp, haystack, result, haystack_chr_shift);
+ ble(haystack, haystack_end, BMLOOPSTR2);
+ add(sp, sp, ASIZE);
+ j(NOMATCH);
+
+ bind(BMLOOPSTR1_LASTCMP);
+ bne(ch1, ch2, BMSKIP);
+
+ bind(BMMATCH);
+ sub(result, haystack, orig_haystack);
+ if (!haystack_isL) {
+ srli(result, result, 1);
+ }
+ add(sp, sp, ASIZE);
+ j(DONE);
+
+ bind(LINEARSTUB);
+ sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm
+ bltz(t0, LINEARSEARCH);
+ mv(result, zr);
+ RuntimeAddress stub = NULL;
+ if (isLL) {
+ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll());
+ assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
+ } else if (needle_isL) {
+ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul());
+ assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
+ } else {
+ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu());
+ assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
+ }
+ trampoline_call(stub);
+ j(DONE);
+
+ bind(NOMATCH);
+ mv(result, -1);
+ j(DONE);
+
+ bind(LINEARSEARCH);
+ string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae);
+
+ bind(DONE);
+ BLOCK_COMMENT("} string_indexof");
+}
+
+// string_indexof
+// result: x10
+// src: x11
+// src_count: x12
+// pattern: x13
+// pattern_count: x14 or 1/2/3/4
+void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register needle,
+ Register haystack_len, Register needle_len,
+ Register tmp1, Register tmp2,
+ Register tmp3, Register tmp4,
+ int needle_con_cnt, Register result, int ae)
+{
+ // Note:
+ // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant
+ // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1
+ assert(needle_con_cnt <= 4, "Invalid needle constant count");
+ assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
+
+ Register ch1 = t0;
+ Register ch2 = t1;
+ Register hlen_neg = haystack_len, nlen_neg = needle_len;
+ Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4;
+
+ bool isLL = ae == StrIntrinsicNode::LL;
+
+ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
+ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
+ int needle_chr_shift = needle_isL ? 0 : 1;
+ int haystack_chr_shift = haystack_isL ? 0 : 1;
+ int needle_chr_size = needle_isL ? 1 : 2;
+ int haystack_chr_size = haystack_isL ? 1 : 2;
+
+ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
+ (load_chr_insn)&MacroAssembler::lhu;
+ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
+ (load_chr_insn)&MacroAssembler::lhu;
+ load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu;
+ load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld;
+
+ Label DO1, DO2, DO3, MATCH, NOMATCH, DONE;
+
+ Register first = tmp3;
+
+ if (needle_con_cnt == -1) {
+ Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
+
+ sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2);
+ bltz(t0, DOSHORT);
+
+ (this->*needle_load_1chr)(first, Address(needle), noreg);
+ slli(t0, needle_len, needle_chr_shift);
+ add(needle, needle, t0);
+ neg(nlen_neg, t0);
+ slli(t0, result_tmp, haystack_chr_shift);
+ add(haystack, haystack, t0);
+ neg(hlen_neg, t0);
+
+ bind(FIRST_LOOP);
+ add(t0, haystack, hlen_neg);
+ (this->*haystack_load_1chr)(ch2, Address(t0), noreg);
+ beq(first, ch2, STR1_LOOP);
+
+ bind(STR2_NEXT);
+ add(hlen_neg, hlen_neg, haystack_chr_size);
+ blez(hlen_neg, FIRST_LOOP);
+ j(NOMATCH);
+
+ bind(STR1_LOOP);
+ add(nlen_tmp, nlen_neg, needle_chr_size);
+ add(hlen_tmp, hlen_neg, haystack_chr_size);
+ bgez(nlen_tmp, MATCH);
+
+ bind(STR1_NEXT);
+ add(ch1, needle, nlen_tmp);
+ (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
+ add(ch2, haystack, hlen_tmp);
+ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
+ bne(ch1, ch2, STR2_NEXT);
+ add(nlen_tmp, nlen_tmp, needle_chr_size);
+ add(hlen_tmp, hlen_tmp, haystack_chr_size);
+ bltz(nlen_tmp, STR1_NEXT);
+ j(MATCH);
+
+ bind(DOSHORT);
+ if (needle_isL == haystack_isL) {
+ sub(t0, needle_len, 2);
+ bltz(t0, DO1);
+ bgtz(t0, DO3);
+ }
+ }
+
+ if (needle_con_cnt == 4) {
+ Label CH1_LOOP;
+ (this->*load_4chr)(ch1, Address(needle), noreg);
+ sub(result_tmp, haystack_len, 4);
+ slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp
+ add(haystack, haystack, tmp3);
+ neg(hlen_neg, tmp3);
+
+ bind(CH1_LOOP);
+ add(ch2, haystack, hlen_neg);
+ (this->*load_4chr)(ch2, Address(ch2), noreg);
+ beq(ch1, ch2, MATCH);
+ add(hlen_neg, hlen_neg, haystack_chr_size);
+ blez(hlen_neg, CH1_LOOP);
+ j(NOMATCH);
+ }
+
+ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) {
+ Label CH1_LOOP;
+ BLOCK_COMMENT("string_indexof DO2 {");
+ bind(DO2);
+ (this->*load_2chr)(ch1, Address(needle), noreg);
+ if (needle_con_cnt == 2) {
+ sub(result_tmp, haystack_len, 2);
+ }
+ slli(tmp3, result_tmp, haystack_chr_shift);
+ add(haystack, haystack, tmp3);
+ neg(hlen_neg, tmp3);
+
+ bind(CH1_LOOP);
+ add(tmp3, haystack, hlen_neg);
+ (this->*load_2chr)(ch2, Address(tmp3), noreg);
+ beq(ch1, ch2, MATCH);
+ add(hlen_neg, hlen_neg, haystack_chr_size);
+ blez(hlen_neg, CH1_LOOP);
+ j(NOMATCH);
+ BLOCK_COMMENT("} string_indexof DO2");
+ }
+
+ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) {
+ Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
+ BLOCK_COMMENT("string_indexof DO3 {");
+
+ bind(DO3);
+ (this->*load_2chr)(first, Address(needle), noreg);
+ (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg);
+ if (needle_con_cnt == 3) {
+ sub(result_tmp, haystack_len, 3);
+ }
+ slli(hlen_tmp, result_tmp, haystack_chr_shift);
+ add(haystack, haystack, hlen_tmp);
+ neg(hlen_neg, hlen_tmp);
+
+ bind(FIRST_LOOP);
+ add(ch2, haystack, hlen_neg);
+ (this->*load_2chr)(ch2, Address(ch2), noreg);
+ beq(first, ch2, STR1_LOOP);
+
+ bind(STR2_NEXT);
+ add(hlen_neg, hlen_neg, haystack_chr_size);
+ blez(hlen_neg, FIRST_LOOP);
+ j(NOMATCH);
+
+ bind(STR1_LOOP);
+ add(hlen_tmp, hlen_neg, 2 * haystack_chr_size);
+ add(ch2, haystack, hlen_tmp);
+ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
+ bne(ch1, ch2, STR2_NEXT);
+ j(MATCH);
+ BLOCK_COMMENT("} string_indexof DO3");
+ }
+
+ if (needle_con_cnt == -1 || needle_con_cnt == 1) {
+ Label DO1_LOOP;
+
+ BLOCK_COMMENT("string_indexof DO1 {");
+ bind(DO1);
+ (this->*needle_load_1chr)(ch1, Address(needle), noreg);
+ sub(result_tmp, haystack_len, 1);
+ mv(tmp3, result_tmp);
+ if (haystack_chr_shift) {
+ slli(tmp3, result_tmp, haystack_chr_shift);
+ }
+ add(haystack, haystack, tmp3);
+ neg(hlen_neg, tmp3);
+
+ bind(DO1_LOOP);
+ add(tmp3, haystack, hlen_neg);
+ (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg);
+ beq(ch1, ch2, MATCH);
+ add(hlen_neg, hlen_neg, haystack_chr_size);
+ blez(hlen_neg, DO1_LOOP);
+ BLOCK_COMMENT("} string_indexof DO1");
+ }
+
+ bind(NOMATCH);
+ mv(result, -1);
+ j(DONE);
+
+ bind(MATCH);
+ srai(t0, hlen_neg, haystack_chr_shift);
+ add(result, result_tmp, t0);
+
+ bind(DONE);
+}
+
+// Compare strings.
+void C2_MacroAssembler::string_compare(Register str1, Register str2,
+ Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
+ Register tmp3, int ae)
+{
+ Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
+ DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
+ SHORT_LOOP_START, TAIL_CHECK, L;
+
+ const int STUB_THRESHOLD = 64 + 8;
+ bool isLL = ae == StrIntrinsicNode::LL;
+ bool isLU = ae == StrIntrinsicNode::LU;
+ bool isUL = ae == StrIntrinsicNode::UL;
+
+ bool str1_isL = isLL || isLU;
+ bool str2_isL = isLL || isUL;
+
+ // for L strings, 1 byte for 1 character
+ // for U strings, 2 bytes for 1 character
+ int str1_chr_size = str1_isL ? 1 : 2;
+ int str2_chr_size = str2_isL ? 1 : 2;
+ int minCharsInWord = isLL ? wordSize : wordSize / 2;
+
+ load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
+ load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
+
+ BLOCK_COMMENT("string_compare {");
+
+ // Bizzarely, the counts are passed in bytes, regardless of whether they
+ // are L or U strings, however the result is always in characters.
+ if (!str1_isL) {
+ sraiw(cnt1, cnt1, 1);
+ }
+ if (!str2_isL) {
+ sraiw(cnt2, cnt2, 1);
+ }
+
+ // Compute the minimum of the string lengths and save the difference in result.
+ sub(result, cnt1, cnt2);
+ bgt(cnt1, cnt2, L);
+ mv(cnt2, cnt1);
+ bind(L);
+
+ // A very short string
+ li(t0, minCharsInWord);
+ ble(cnt2, t0, SHORT_STRING);
+
+ // Compare longwords
+ // load first parts of strings and finish initialization while loading
+ {
+ if (str1_isL == str2_isL) { // LL or UU
+ // load 8 bytes once to compare
+ ld(tmp1, Address(str1));
+ beq(str1, str2, DONE);
+ ld(tmp2, Address(str2));
+ li(t0, STUB_THRESHOLD);
+ bge(cnt2, t0, STUB);
+ sub(cnt2, cnt2, minCharsInWord);
+ beqz(cnt2, TAIL_CHECK);
+ // convert cnt2 from characters to bytes
+ if (!str1_isL) {
+ slli(cnt2, cnt2, 1);
+ }
+ add(str2, str2, cnt2);
+ add(str1, str1, cnt2);
+ sub(cnt2, zr, cnt2);
+ } else if (isLU) { // LU case
+ lwu(tmp1, Address(str1));
+ ld(tmp2, Address(str2));
+ li(t0, STUB_THRESHOLD);
+ bge(cnt2, t0, STUB);
+ addi(cnt2, cnt2, -4);
+ add(str1, str1, cnt2);
+ sub(cnt1, zr, cnt2);
+ slli(cnt2, cnt2, 1);
+ add(str2, str2, cnt2);
+ inflate_lo32(tmp3, tmp1);
+ mv(tmp1, tmp3);
+ sub(cnt2, zr, cnt2);
+ addi(cnt1, cnt1, 4);
+ } else { // UL case
+ ld(tmp1, Address(str1));
+ lwu(tmp2, Address(str2));
+ li(t0, STUB_THRESHOLD);
+ bge(cnt2, t0, STUB);
+ addi(cnt2, cnt2, -4);
+ slli(t0, cnt2, 1);
+ sub(cnt1, zr, t0);
+ add(str1, str1, t0);
+ add(str2, str2, cnt2);
+ inflate_lo32(tmp3, tmp2);
+ mv(tmp2, tmp3);
+ sub(cnt2, zr, cnt2);
+ addi(cnt1, cnt1, 8);
+ }
+ addi(cnt2, cnt2, isUL ? 4 : 8);
+ bgez(cnt2, TAIL);
+ xorr(tmp3, tmp1, tmp2);
+ bnez(tmp3, DIFFERENCE);
+
+ // main loop
+ bind(NEXT_WORD);
+ if (str1_isL == str2_isL) { // LL or UU
+ add(t0, str1, cnt2);
+ ld(tmp1, Address(t0));
+ add(t0, str2, cnt2);
+ ld(tmp2, Address(t0));
+ addi(cnt2, cnt2, 8);
+ } else if (isLU) { // LU case
+ add(t0, str1, cnt1);
+ lwu(tmp1, Address(t0));
+ add(t0, str2, cnt2);
+ ld(tmp2, Address(t0));
+ addi(cnt1, cnt1, 4);
+ inflate_lo32(tmp3, tmp1);
+ mv(tmp1, tmp3);
+ addi(cnt2, cnt2, 8);
+ } else { // UL case
+ add(t0, str2, cnt2);
+ lwu(tmp2, Address(t0));
+ add(t0, str1, cnt1);
+ ld(tmp1, Address(t0));
+ inflate_lo32(tmp3, tmp2);
+ mv(tmp2, tmp3);
+ addi(cnt1, cnt1, 8);
+ addi(cnt2, cnt2, 4);
+ }
+ bgez(cnt2, TAIL);
+
+ xorr(tmp3, tmp1, tmp2);
+ beqz(tmp3, NEXT_WORD);
+ j(DIFFERENCE);
+ bind(TAIL);
+ xorr(tmp3, tmp1, tmp2);
+ bnez(tmp3, DIFFERENCE);
+ // Last longword. In the case where length == 4 we compare the
+ // same longword twice, but that's still faster than another
+ // conditional branch.
+ if (str1_isL == str2_isL) { // LL or UU
+ ld(tmp1, Address(str1));
+ ld(tmp2, Address(str2));
+ } else if (isLU) { // LU case
+ lwu(tmp1, Address(str1));
+ ld(tmp2, Address(str2));
+ inflate_lo32(tmp3, tmp1);
+ mv(tmp1, tmp3);
+ } else { // UL case
+ lwu(tmp2, Address(str2));
+ ld(tmp1, Address(str1));
+ inflate_lo32(tmp3, tmp2);
+ mv(tmp2, tmp3);
+ }
+ bind(TAIL_CHECK);
+ xorr(tmp3, tmp1, tmp2);
+ beqz(tmp3, DONE);
+
+ // Find the first different characters in the longwords and
+ // compute their difference.
+ bind(DIFFERENCE);
+ ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb
+ srl(tmp1, tmp1, result);
+ srl(tmp2, tmp2, result);
+ if (isLL) {
+ andi(tmp1, tmp1, 0xFF);
+ andi(tmp2, tmp2, 0xFF);
+ } else {
+ andi(tmp1, tmp1, 0xFFFF);
+ andi(tmp2, tmp2, 0xFFFF);
+ }
+ sub(result, tmp1, tmp2);
+ j(DONE);
+ }
+
+ bind(STUB);
+ RuntimeAddress stub = NULL;
+ switch (ae) {
+ case StrIntrinsicNode::LL:
+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL());
+ break;
+ case StrIntrinsicNode::UU:
+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU());
+ break;
+ case StrIntrinsicNode::LU:
+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU());
+ break;
+ case StrIntrinsicNode::UL:
+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL());
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ assert(stub.target() != NULL, "compare_long_string stub has not been generated");
+ trampoline_call(stub);
+ j(DONE);
+
+ bind(SHORT_STRING);
+ // Is the minimum length zero?
+ beqz(cnt2, DONE);
+ // arrange code to do most branches while loading and loading next characters
+ // while comparing previous
+ (this->*str1_load_chr)(tmp1, Address(str1), t0);
+ addi(str1, str1, str1_chr_size);
+ addi(cnt2, cnt2, -1);
+ beqz(cnt2, SHORT_LAST_INIT);
+ (this->*str2_load_chr)(cnt1, Address(str2), t0);
+ addi(str2, str2, str2_chr_size);
+ j(SHORT_LOOP_START);
+ bind(SHORT_LOOP);
+ addi(cnt2, cnt2, -1);
+ beqz(cnt2, SHORT_LAST);
+ bind(SHORT_LOOP_START);
+ (this->*str1_load_chr)(tmp2, Address(str1), t0);
+ addi(str1, str1, str1_chr_size);
+ (this->*str2_load_chr)(t0, Address(str2), t0);
+ addi(str2, str2, str2_chr_size);
+ bne(tmp1, cnt1, SHORT_LOOP_TAIL);
+ addi(cnt2, cnt2, -1);
+ beqz(cnt2, SHORT_LAST2);
+ (this->*str1_load_chr)(tmp1, Address(str1), t0);
+ addi(str1, str1, str1_chr_size);
+ (this->*str2_load_chr)(cnt1, Address(str2), t0);
+ addi(str2, str2, str2_chr_size);
+ beq(tmp2, t0, SHORT_LOOP);
+ sub(result, tmp2, t0);
+ j(DONE);
+ bind(SHORT_LOOP_TAIL);
+ sub(result, tmp1, cnt1);
+ j(DONE);
+ bind(SHORT_LAST2);
+ beq(tmp2, t0, DONE);
+ sub(result, tmp2, t0);
+
+ j(DONE);
+ bind(SHORT_LAST_INIT);
+ (this->*str2_load_chr)(cnt1, Address(str2), t0);
+ addi(str2, str2, str2_chr_size);
+ bind(SHORT_LAST);
+ beq(tmp1, cnt1, DONE);
+ sub(result, tmp1, cnt1);
+
+ bind(DONE);
+
+ BLOCK_COMMENT("} string_compare");
+}
+
+void C2_MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
+ Register tmp4, Register tmp5, Register tmp6, Register result,
+ Register cnt1, int elem_size) {
+ Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR;
+ Register tmp1 = t0;
+ Register tmp2 = t1;
+ Register cnt2 = tmp2; // cnt2 only used in array length compare
+ Register elem_per_word = tmp6;
+ int log_elem_size = exact_log2(elem_size);
+ int length_offset = arrayOopDesc::length_offset_in_bytes();
+ int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
+
+ assert(elem_size == 1 || elem_size == 2, "must be char or byte");
+ assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6);
+ li(elem_per_word, wordSize / elem_size);
+
+ BLOCK_COMMENT("arrays_equals {");
+
+ // if (a1 == a2), return true
+ beq(a1, a2, SAME);
+
+ mv(result, false);
+ beqz(a1, DONE);
+ beqz(a2, DONE);
+ lwu(cnt1, Address(a1, length_offset));
+ lwu(cnt2, Address(a2, length_offset));
+ bne(cnt2, cnt1, DONE);
+ beqz(cnt1, SAME);
+
+ slli(tmp5, cnt1, 3 + log_elem_size);
+ sub(tmp5, zr, tmp5);
+ add(a1, a1, base_offset);
+ add(a2, a2, base_offset);
+ ld(tmp3, Address(a1, 0));
+ ld(tmp4, Address(a2, 0));
+ ble(cnt1, elem_per_word, SHORT); // short or same
+
+ // Main 16 byte comparison loop with 2 exits
+ bind(NEXT_DWORD); {
+ ld(tmp1, Address(a1, wordSize));
+ ld(tmp2, Address(a2, wordSize));
+ sub(cnt1, cnt1, 2 * wordSize / elem_size);
+ blez(cnt1, TAIL);
+ bne(tmp3, tmp4, DONE);
+ ld(tmp3, Address(a1, 2 * wordSize));
+ ld(tmp4, Address(a2, 2 * wordSize));
+ add(a1, a1, 2 * wordSize);
+ add(a2, a2, 2 * wordSize);
+ ble(cnt1, elem_per_word, TAIL2);
+ } beq(tmp1, tmp2, NEXT_DWORD);
+ j(DONE);
+
+ bind(TAIL);
+ xorr(tmp4, tmp3, tmp4);
+ xorr(tmp2, tmp1, tmp2);
+ sll(tmp2, tmp2, tmp5);
+ orr(tmp5, tmp4, tmp2);
+ j(IS_TMP5_ZR);
+
+ bind(TAIL2);
+ bne(tmp1, tmp2, DONE);
+
+ bind(SHORT);
+ xorr(tmp4, tmp3, tmp4);
+ sll(tmp5, tmp4, tmp5);
+
+ bind(IS_TMP5_ZR);
+ bnez(tmp5, DONE);
+
+ bind(SAME);
+ mv(result, true);
+ // That's it.
+ bind(DONE);
+
+ BLOCK_COMMENT("} array_equals");
+}
+
+// Compare Strings
+
+// For Strings we're passed the address of the first characters in a1
+// and a2 and the length in cnt1.
+// elem_size is the element size in bytes: either 1 or 2.
+// There are two implementations. For arrays >= 8 bytes, all
+// comparisons (including the final one, which may overlap) are
+// performed 8 bytes at a time. For strings < 8 bytes, we compare a
+// halfword, then a short, and then a byte.
+
+void C2_MacroAssembler::string_equals(Register a1, Register a2,
+ Register result, Register cnt1, int elem_size)
+{
+ Label SAME, DONE, SHORT, NEXT_WORD;
+ Register tmp1 = t0;
+ Register tmp2 = t1;
+
+ assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte");
+ assert_different_registers(a1, a2, result, cnt1, t0, t1);
+
+ BLOCK_COMMENT("string_equals {");
+
+ mv(result, false);
+
+ // Check for short strings, i.e. smaller than wordSize.
+ sub(cnt1, cnt1, wordSize);
+ bltz(cnt1, SHORT);
+
+ // Main 8 byte comparison loop.
+ bind(NEXT_WORD); {
+ ld(tmp1, Address(a1, 0));
+ add(a1, a1, wordSize);
+ ld(tmp2, Address(a2, 0));
+ add(a2, a2, wordSize);
+ sub(cnt1, cnt1, wordSize);
+ bne(tmp1, tmp2, DONE);
+ } bgtz(cnt1, NEXT_WORD);
+
+ // Last longword. In the case where length == 4 we compare the
+ // same longword twice, but that's still faster than another
+ // conditional branch.
+ // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
+ // length == 4.
+ add(tmp1, a1, cnt1);
+ ld(tmp1, Address(tmp1, 0));
+ add(tmp2, a2, cnt1);
+ ld(tmp2, Address(tmp2, 0));
+ bne(tmp1, tmp2, DONE);
+ j(SAME);
+
+ bind(SHORT);
+ Label TAIL03, TAIL01;
+
+ // 0-7 bytes left.
+ andi(t0, cnt1, 4);
+ beqz(t0, TAIL03);
+ {
+ lwu(tmp1, Address(a1, 0));
+ add(a1, a1, 4);
+ lwu(tmp2, Address(a2, 0));
+ add(a2, a2, 4);
+ bne(tmp1, tmp2, DONE);
+ }
+
+ bind(TAIL03);
+ // 0-3 bytes left.
+ andi(t0, cnt1, 2);
+ beqz(t0, TAIL01);
+ {
+ lhu(tmp1, Address(a1, 0));
+ add(a1, a1, 2);
+ lhu(tmp2, Address(a2, 0));
+ add(a2, a2, 2);
+ bne(tmp1, tmp2, DONE);
+ }
+
+ bind(TAIL01);
+ if (elem_size == 1) { // Only needed when comparing 1-byte elements
+ // 0-1 bytes left.
+ andi(t0, cnt1, 1);
+ beqz(t0, SAME);
+ {
+ lbu(tmp1, a1, 0);
+ lbu(tmp2, a2, 0);
+ bne(tmp1, tmp2, DONE);
+ }
+ }
+
+ // Arrays are equal.
+ bind(SAME);
+ mv(result, true);
+
+ // That's it.
+ bind(DONE);
+ BLOCK_COMMENT("} string_equals");
+}
+
+typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
+typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label,
+ bool is_far, bool is_unordered);
+
+static conditional_branch_insn conditional_branches[] =
+{
+ /* SHORT branches */
+ (conditional_branch_insn)&Assembler::beq,
+ (conditional_branch_insn)&Assembler::bgt,
+ NULL, // BoolTest::overflow
+ (conditional_branch_insn)&Assembler::blt,
+ (conditional_branch_insn)&Assembler::bne,
+ (conditional_branch_insn)&Assembler::ble,
+ NULL, // BoolTest::no_overflow
+ (conditional_branch_insn)&Assembler::bge,
+
+ /* UNSIGNED branches */
+ (conditional_branch_insn)&Assembler::beq,
+ (conditional_branch_insn)&Assembler::bgtu,
+ NULL,
+ (conditional_branch_insn)&Assembler::bltu,
+ (conditional_branch_insn)&Assembler::bne,
+ (conditional_branch_insn)&Assembler::bleu,
+ NULL,
+ (conditional_branch_insn)&Assembler::bgeu
+};
+
+static float_conditional_branch_insn float_conditional_branches[] =
+{
+ /* FLOAT SHORT branches */
+ (float_conditional_branch_insn)&MacroAssembler::float_beq,
+ (float_conditional_branch_insn)&MacroAssembler::float_bgt,
+ NULL, // BoolTest::overflow
+ (float_conditional_branch_insn)&MacroAssembler::float_blt,
+ (float_conditional_branch_insn)&MacroAssembler::float_bne,
+ (float_conditional_branch_insn)&MacroAssembler::float_ble,
+ NULL, // BoolTest::no_overflow
+ (float_conditional_branch_insn)&MacroAssembler::float_bge,
+
+ /* DOUBLE SHORT branches */
+ (float_conditional_branch_insn)&MacroAssembler::double_beq,
+ (float_conditional_branch_insn)&MacroAssembler::double_bgt,
+ NULL,
+ (float_conditional_branch_insn)&MacroAssembler::double_blt,
+ (float_conditional_branch_insn)&MacroAssembler::double_bne,
+ (float_conditional_branch_insn)&MacroAssembler::double_ble,
+ NULL,
+ (float_conditional_branch_insn)&MacroAssembler::double_bge
+};
+
+void C2_MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) {
+ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])),
+ "invalid conditional branch index");
+ (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far);
+}
+
+// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use
+// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode().
+void C2_MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) {
+ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])),
+ "invalid float conditional branch index");
+ int booltest_flag = cmpFlag & ~(C2_MacroAssembler::double_branch_mask);
+ (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far,
+ (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true);
+}
+
+void C2_MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
+ switch (cmpFlag) {
+ case BoolTest::eq:
+ case BoolTest::le:
+ beqz(op1, L, is_far);
+ break;
+ case BoolTest::ne:
+ case BoolTest::gt:
+ bnez(op1, L, is_far);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+}
+
+void C2_MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
+ switch (cmpFlag) {
+ case BoolTest::eq:
+ beqz(op1, L, is_far);
+ break;
+ case BoolTest::ne:
+ bnez(op1, L, is_far);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+}
+
+void C2_MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) {
+ Label L;
+ cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L);
+ mv(dst, src);
+ bind(L);
+}
+
+// Set dst to NaN if any NaN input.
+void C2_MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2,
+ bool is_double, bool is_min) {
+ assert_different_registers(dst, src1, src2);
+
+ Label Done;
+ fsflags(zr);
+ if (is_double) {
+ is_min ? fmin_d(dst, src1, src2)
+ : fmax_d(dst, src1, src2);
+ // Checking NaNs
+ flt_d(zr, src1, src2);
+ } else {
+ is_min ? fmin_s(dst, src1, src2)
+ : fmax_s(dst, src1, src2);
+ // Checking NaNs
+ flt_s(zr, src1, src2);
+ }
+
+ frflags(t0);
+ beqz(t0, Done);
+
+ // In case of NaNs
+ is_double ? fadd_d(dst, src1, src2)
+ : fadd_s(dst, src1, src2);
+
+ bind(Done);
+}
+
+void C2_MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2,
+ VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE) {
+ Label loop;
+ Assembler::SEW sew = islatin ? Assembler::e8 : Assembler::e16;
+
+ bind(loop);
+ vsetvli(tmp1, cnt, sew, Assembler::m2);
+ vlex_v(vr1, a1, sew);
+ vlex_v(vr2, a2, sew);
+ vmsne_vv(vrs, vr1, vr2);
+ vfirst_m(tmp2, vrs);
+ bgez(tmp2, DONE);
+ sub(cnt, cnt, tmp1);
+ if (!islatin) {
+ slli(tmp1, tmp1, 1); // get byte counts
+ }
+ add(a1, a1, tmp1);
+ add(a2, a2, tmp1);
+ bnez(cnt, loop);
+
+ mv(result, true);
+}
+
+void C2_MacroAssembler::string_equals_v(Register a1, Register a2, Register result, Register cnt, int elem_size) {
+ Label DONE;
+ Register tmp1 = t0;
+ Register tmp2 = t1;
+
+ BLOCK_COMMENT("string_equals_v {");
+
+ mv(result, false);
+
+ if (elem_size == 2) {
+ srli(cnt, cnt, 1);
+ }
+
+ element_compare(a1, a2, result, cnt, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE);
+
+ bind(DONE);
+ BLOCK_COMMENT("} string_equals_v");
+}
+
+// used by C2 ClearArray patterns.
+// base: Address of a buffer to be zeroed
+// cnt: Count in HeapWords
+//
+// base, cnt, v0, v1 and t0 are clobbered.
+void C2_MacroAssembler::clear_array_v(Register base, Register cnt) {
+ Label loop;
+
+ // making zero words
+ vsetvli(t0, cnt, Assembler::e64, Assembler::m4);
+ vxor_vv(v0, v0, v0);
+
+ bind(loop);
+ vsetvli(t0, cnt, Assembler::e64, Assembler::m4);
+ vse64_v(v0, base);
+ sub(cnt, cnt, t0);
+ shadd(base, t0, base, t0, 3);
+ bnez(cnt, loop);
+}
+
+void C2_MacroAssembler::arrays_equals_v(Register a1, Register a2, Register result,
+ Register cnt1, int elem_size) {
+ Label DONE;
+ Register tmp1 = t0;
+ Register tmp2 = t1;
+ Register cnt2 = tmp2;
+ int length_offset = arrayOopDesc::length_offset_in_bytes();
+ int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
+
+ BLOCK_COMMENT("arrays_equals_v {");
+
+ // if (a1 == a2), return true
+ mv(result, true);
+ beq(a1, a2, DONE);
+
+ mv(result, false);
+ // if a1 == null or a2 == null, return false
+ beqz(a1, DONE);
+ beqz(a2, DONE);
+ // if (a1.length != a2.length), return false
+ lwu(cnt1, Address(a1, length_offset));
+ lwu(cnt2, Address(a2, length_offset));
+ bne(cnt1, cnt2, DONE);
+
+ la(a1, Address(a1, base_offset));
+ la(a2, Address(a2, base_offset));
+
+ element_compare(a1, a2, result, cnt1, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE);
+
+ bind(DONE);
+
+ BLOCK_COMMENT("} arrays_equals_v");
+}
+
+void C2_MacroAssembler::string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2,
+ Register result, Register tmp1, Register tmp2, int encForm) {
+ Label DIFFERENCE, DONE, L, loop;
+ bool encLL = encForm == StrIntrinsicNode::LL;
+ bool encLU = encForm == StrIntrinsicNode::LU;
+ bool encUL = encForm == StrIntrinsicNode::UL;
+
+ bool str1_isL = encLL || encLU;
+ bool str2_isL = encLL || encUL;
+
+ int minCharsInWord = encLL ? wordSize : wordSize / 2;
+
+ BLOCK_COMMENT("string_compare {");
+
+ // for Lating strings, 1 byte for 1 character
+ // for UTF16 strings, 2 bytes for 1 character
+ if (!str1_isL)
+ sraiw(cnt1, cnt1, 1);
+ if (!str2_isL)
+ sraiw(cnt2, cnt2, 1);
+
+ // if str1 == str2, return the difference
+ // save the minimum of the string lengths in cnt2.
+ sub(result, cnt1, cnt2);
+ bgt(cnt1, cnt2, L);
+ mv(cnt2, cnt1);
+ bind(L);
+
+ if (str1_isL == str2_isL) { // LL or UU
+ element_compare(str1, str2, zr, cnt2, tmp1, tmp2, v2, v4, v1, encLL, DIFFERENCE);
+ j(DONE);
+ } else { // LU or UL
+ Register strL = encLU ? str1 : str2;
+ Register strU = encLU ? str2 : str1;
+ VectorRegister vstr1 = encLU ? v4 : v0;
+ VectorRegister vstr2 = encLU ? v0 : v4;
+
+ bind(loop);
+ vsetvli(tmp1, cnt2, Assembler::e8, Assembler::m2);
+ vle8_v(vstr1, strL);
+ vsetvli(tmp1, cnt2, Assembler::e16, Assembler::m4);
+ vzext_vf2(vstr2, vstr1);
+ vle16_v(vstr1, strU);
+ vmsne_vv(v0, vstr2, vstr1);
+ vfirst_m(tmp2, v0);
+ bgez(tmp2, DIFFERENCE);
+ sub(cnt2, cnt2, tmp1);
+ add(strL, strL, tmp1);
+ shadd(strU, tmp1, strU, tmp1, 1);
+ bnez(cnt2, loop);
+ j(DONE);
+ }
+ bind(DIFFERENCE);
+ slli(tmp1, tmp2, 1);
+ add(str1, str1, str1_isL ? tmp2 : tmp1);
+ add(str2, str2, str2_isL ? tmp2 : tmp1);
+ str1_isL ? lbu(tmp1, Address(str1, 0)) : lhu(tmp1, Address(str1, 0));
+ str2_isL ? lbu(tmp2, Address(str2, 0)) : lhu(tmp2, Address(str2, 0));
+ sub(result, tmp1, tmp2);
+
+ bind(DONE);
+}
+
+void C2_MacroAssembler::byte_array_inflate_v(Register src, Register dst, Register len, Register tmp) {
+ Label loop;
+ assert_different_registers(src, dst, len, tmp, t0);
+
+ BLOCK_COMMENT("byte_array_inflate_v {");
+ bind(loop);
+ vsetvli(tmp, len, Assembler::e8, Assembler::m2);
+ vle8_v(v2, src);
+ vsetvli(t0, len, Assembler::e16, Assembler::m4);
+ vzext_vf2(v0, v2);
+ vse16_v(v0, dst);
+ sub(len, len, tmp);
+ add(src, src, tmp);
+ shadd(dst, tmp, dst, tmp, 1);
+ bnez(len, loop);
+ BLOCK_COMMENT("} byte_array_inflate_v");
+}
+
+// Compress char[] array to byte[].
+// result: the array length if every element in array can be encoded; 0, otherwise.
+void C2_MacroAssembler::char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp) {
+ Label done;
+ encode_iso_array_v(src, dst, len, result, tmp);
+ beqz(len, done);
+ mv(result, zr);
+ bind(done);
+}
+
+// result: the number of elements had been encoded.
+void C2_MacroAssembler::encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp) {
+ Label loop, DIFFERENCE, DONE;
+
+ BLOCK_COMMENT("encode_iso_array_v {");
+ mv(result, 0);
+
+ bind(loop);
+ mv(tmp, 0xff);
+ vsetvli(t0, len, Assembler::e16, Assembler::m2);
+ vle16_v(v2, src);
+ // if element > 0xff, stop
+ vmsgtu_vx(v1, v2, tmp);
+ vfirst_m(tmp, v1);
+ vmsbf_m(v0, v1);
+ // compress char to byte
+ vsetvli(t0, len, Assembler::e8);
+ vncvt_x_x_w(v1, v2, Assembler::v0_t);
+ vse8_v(v1, dst, Assembler::v0_t);
+
+ bgez(tmp, DIFFERENCE);
+ add(result, result, t0);
+ add(dst, dst, t0);
+ sub(len, len, t0);
+ shadd(src, t0, src, t0, 1);
+ bnez(len, loop);
+ j(DONE);
+
+ bind(DIFFERENCE);
+ add(result, result, tmp);
+
+ bind(DONE);
+ BLOCK_COMMENT("} encode_iso_array_v");
+}
+
+void C2_MacroAssembler::count_positives_v(Register ary, Register len, Register result, Register tmp) {
+ Label LOOP, SET_RESULT, DONE;
+
+ BLOCK_COMMENT("count_positives_v {");
+ mv(result, zr);
+
+ bind(LOOP);
+ vsetvli(t0, len, Assembler::e8, Assembler::m4);
+ vle8_v(v0, ary);
+ vmslt_vx(v0, v0, zr);
+ vfirst_m(tmp, v0);
+ bgez(tmp, SET_RESULT);
+ // if tmp == -1, all bytes are positive
+ add(result, result, t0);
+
+ sub(len, len, t0);
+ add(ary, ary, t0);
+ bnez(len, LOOP);
+ j(DONE);
+
+ // add remaining positive bytes count
+ bind(SET_RESULT);
+ add(result, result, tmp);
+
+ bind(DONE);
+ BLOCK_COMMENT("} count_positives_v");
+}
+
+void C2_MacroAssembler::string_indexof_char_v(Register str1, Register cnt1,
+ Register ch, Register result,
+ Register tmp1, Register tmp2,
+ bool isL) {
+ mv(result, zr);
+
+ Label loop, MATCH, DONE;
+ Assembler::SEW sew = isL ? Assembler::e8 : Assembler::e16;
+ bind(loop);
+ vsetvli(tmp1, cnt1, sew, Assembler::m4);
+ vlex_v(v0, str1, sew);
+ vmseq_vx(v0, v0, ch);
+ vfirst_m(tmp2, v0);
+ bgez(tmp2, MATCH); // if equal, return index
+
+ add(result, result, tmp1);
+ sub(cnt1, cnt1, tmp1);
+ if (!isL) slli(tmp1, tmp1, 1);
+ add(str1, str1, tmp1);
+ bnez(cnt1, loop);
+
+ mv(result, -1);
+ j(DONE);
+
+ bind(MATCH);
+ add(result, result, tmp2);
+
+ bind(DONE);
+}
+
+// Set dst to NaN if any NaN input.
+void C2_MacroAssembler::minmax_FD_v(VectorRegister dst, VectorRegister src1, VectorRegister src2,
+ bool is_double, bool is_min) {
+ assert_different_registers(dst, src1, src2);
+
+ vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32);
+
+ is_min ? vfmin_vv(dst, src1, src2)
+ : vfmax_vv(dst, src1, src2);
+
+ vmfne_vv(v0, src1, src1);
+ vfadd_vv(dst, src1, src1, Assembler::v0_t);
+ vmfne_vv(v0, src2, src2);
+ vfadd_vv(dst, src2, src2, Assembler::v0_t);
+}
+
+// Set dst to NaN if any NaN input.
+void C2_MacroAssembler::reduce_minmax_FD_v(FloatRegister dst,
+ FloatRegister src1, VectorRegister src2,
+ VectorRegister tmp1, VectorRegister tmp2,
+ bool is_double, bool is_min) {
+ assert_different_registers(src2, tmp1, tmp2);
+
+ Label L_done, L_NaN;
+ vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32);
+ vfmv_s_f(tmp2, src1);
+
+ is_min ? vfredmin_vs(tmp1, src2, tmp2)
+ : vfredmax_vs(tmp1, src2, tmp2);
+
+ fsflags(zr);
+ // Checking NaNs
+ vmflt_vf(tmp2, src2, src1);
+ frflags(t0);
+ bnez(t0, L_NaN);
+ j(L_done);
+
+ bind(L_NaN);
+ vfmv_s_f(tmp2, src1);
+ vfredsum_vs(tmp1, src2, tmp2);
+
+ bind(L_done);
+ vfmv_f_s(dst, tmp1);
+}
diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..c71df4c101b664b129c1d6278788d3b1059cfbb6
--- /dev/null
+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP
+#define CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP
+
+// C2_MacroAssembler contains high-level macros for C2
+
+ private:
+ void element_compare(Register r1, Register r2,
+ Register result, Register cnt,
+ Register tmp1, Register tmp2,
+ VectorRegister vr1, VectorRegister vr2,
+ VectorRegister vrs,
+ bool is_latin, Label& DONE);
+ public:
+
+ void string_compare(Register str1, Register str2,
+ Register cnt1, Register cnt2, Register result,
+ Register tmp1, Register tmp2, Register tmp3,
+ int ae);
+
+ void string_indexof_char_short(Register str1, Register cnt1,
+ Register ch, Register result,
+ bool isL);
+
+ void string_indexof_char(Register str1, Register cnt1,
+ Register ch, Register result,
+ Register tmp1, Register tmp2,
+ Register tmp3, Register tmp4,
+ bool isL);
+
+ void string_indexof(Register str1, Register str2,
+ Register cnt1, Register cnt2,
+ Register tmp1, Register tmp2,
+ Register tmp3, Register tmp4,
+ Register tmp5, Register tmp6,
+ Register result, int ae);
+
+ void string_indexof_linearscan(Register haystack, Register needle,
+ Register haystack_len, Register needle_len,
+ Register tmp1, Register tmp2,
+ Register tmp3, Register tmp4,
+ int needle_con_cnt, Register result, int ae);
+
+ void arrays_equals(Register r1, Register r2,
+ Register tmp3, Register tmp4,
+ Register tmp5, Register tmp6,
+ Register result, Register cnt1,
+ int elem_size);
+
+ void string_equals(Register r1, Register r2,
+ Register result, Register cnt1,
+ int elem_size);
+
+ // refer to conditional_branches and float_conditional_branches
+ static const int bool_test_bits = 3;
+ static const int neg_cond_bits = 2;
+ static const int unsigned_branch_mask = 1 << bool_test_bits;
+ static const int double_branch_mask = 1 << bool_test_bits;
+
+ // cmp
+ void cmp_branch(int cmpFlag,
+ Register op1, Register op2,
+ Label& label, bool is_far = false);
+
+ void float_cmp_branch(int cmpFlag,
+ FloatRegister op1, FloatRegister op2,
+ Label& label, bool is_far = false);
+
+ void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op,
+ Label& L, bool is_far = false);
+
+ void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op,
+ Label& L, bool is_far = false);
+
+ void enc_cmove(int cmpFlag,
+ Register op1, Register op2,
+ Register dst, Register src);
+
+ void spill(Register r, bool is64, int offset) {
+ is64 ? sd(r, Address(sp, offset))
+ : sw(r, Address(sp, offset));
+ }
+
+ void spill(FloatRegister f, bool is64, int offset) {
+ is64 ? fsd(f, Address(sp, offset))
+ : fsw(f, Address(sp, offset));
+ }
+
+ void spill(VectorRegister v, int offset) {
+ add(t0, sp, offset);
+ vs1r_v(v, t0);
+ }
+
+ void unspill(Register r, bool is64, int offset) {
+ is64 ? ld(r, Address(sp, offset))
+ : lw(r, Address(sp, offset));
+ }
+
+ void unspillu(Register r, bool is64, int offset) {
+ is64 ? ld(r, Address(sp, offset))
+ : lwu(r, Address(sp, offset));
+ }
+
+ void unspill(FloatRegister f, bool is64, int offset) {
+ is64 ? fld(f, Address(sp, offset))
+ : flw(f, Address(sp, offset));
+ }
+
+ void unspill(VectorRegister v, int offset) {
+ add(t0, sp, offset);
+ vl1r_v(v, t0);
+ }
+
+ void spill_copy_vector_stack_to_stack(int src_offset, int dst_offset, int vec_reg_size_in_bytes) {
+ assert(vec_reg_size_in_bytes % 16 == 0, "unexpected vector reg size");
+ unspill(v0, src_offset);
+ spill(v0, dst_offset);
+ }
+
+ void minmax_FD(FloatRegister dst,
+ FloatRegister src1, FloatRegister src2,
+ bool is_double, bool is_min);
+
+ // intrinsic methods implemented by rvv instructions
+ void string_equals_v(Register r1, Register r2,
+ Register result, Register cnt1,
+ int elem_size);
+
+ void arrays_equals_v(Register r1, Register r2,
+ Register result, Register cnt1,
+ int elem_size);
+
+ void string_compare_v(Register str1, Register str2,
+ Register cnt1, Register cnt2,
+ Register result,
+ Register tmp1, Register tmp2,
+ int encForm);
+
+ void clear_array_v(Register base, Register cnt);
+
+ void byte_array_inflate_v(Register src, Register dst,
+ Register len, Register tmp);
+
+ void char_array_compress_v(Register src, Register dst,
+ Register len, Register result,
+ Register tmp);
+
+ void encode_iso_array_v(Register src, Register dst,
+ Register len, Register result,
+ Register tmp);
+
+ void count_positives_v(Register ary, Register len,
+ Register result, Register tmp);
+
+ void string_indexof_char_v(Register str1, Register cnt1,
+ Register ch, Register result,
+ Register tmp1, Register tmp2,
+ bool isL);
+
+ void minmax_FD_v(VectorRegister dst,
+ VectorRegister src1, VectorRegister src2,
+ bool is_double, bool is_min);
+
+ void reduce_minmax_FD_v(FloatRegister dst,
+ FloatRegister src1, VectorRegister src2,
+ VectorRegister tmp1, VectorRegister tmp2,
+ bool is_double, bool is_min);
+
+#endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..53a41665f4b8e5b17442c99f51b2cb66b2b9d6fc
--- /dev/null
+++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_C2_GLOBALS_RISCV_HPP
+#define CPU_RISCV_C2_GLOBALS_RISCV_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+// Sets the default values for platform dependent flags used by the server compiler.
+// (see c2_globals.hpp). Alpha-sorted.
+
+define_pd_global(bool, BackgroundCompilation, true);
+define_pd_global(bool, CICompileOSR, true);
+define_pd_global(bool, InlineIntrinsics, true);
+define_pd_global(bool, PreferInterpreterNativeStubs, false);
+define_pd_global(bool, ProfileTraps, true);
+define_pd_global(bool, UseOnStackReplacement, true);
+define_pd_global(bool, ProfileInterpreter, true);
+define_pd_global(bool, TieredCompilation, COMPILER1_PRESENT(true) NOT_COMPILER1(false));
+define_pd_global(intx, CompileThreshold, 10000);
+
+define_pd_global(intx, OnStackReplacePercentage, 140);
+define_pd_global(intx, ConditionalMoveLimit, 0);
+define_pd_global(intx, FreqInlineSize, 325);
+define_pd_global(intx, MinJumpTableSize, 10);
+define_pd_global(intx, InteriorEntryAlignment, 16);
+define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
+define_pd_global(intx, LoopUnrollLimit, 60);
+define_pd_global(intx, LoopPercentProfileLimit, 10);
+// InitialCodeCacheSize derived from specjbb2000 run.
+define_pd_global(intx, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize
+define_pd_global(intx, CodeCacheExpansionSize, 64*K);
+
+// Ergonomics related flags
+define_pd_global(uint64_t,MaxRAM, 128ULL*G);
+define_pd_global(intx, RegisterCostAreaRatio, 16000);
+
+// Peephole and CISC spilling both break the graph, and so makes the
+// scheduler sick.
+define_pd_global(bool, OptoPeephole, false);
+define_pd_global(bool, UseCISCSpill, false);
+define_pd_global(bool, OptoScheduling, true);
+define_pd_global(bool, OptoBundling, false);
+define_pd_global(bool, OptoRegScheduling, false);
+define_pd_global(bool, SuperWordLoopUnrollAnalysis, true);
+define_pd_global(bool, IdealizeClearArrayNode, true);
+
+define_pd_global(intx, ReservedCodeCacheSize, 48*M);
+define_pd_global(intx, NonProfiledCodeHeapSize, 21*M);
+define_pd_global(intx, ProfiledCodeHeapSize, 22*M);
+define_pd_global(intx, NonNMethodCodeHeapSize, 5*M );
+define_pd_global(uintx, CodeCacheMinBlockLength, 6);
+define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K);
+
+// Ergonomics related flags
+define_pd_global(bool, NeverActAsServerClassMachine, false);
+
+define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed.
+
+#endif // CPU_RISCV_C2_GLOBALS_RISCV_HPP
diff --git a/src/java.base/windows/native/libnet/InetAddressImplFactory.c b/src/hotspot/cpu/riscv/c2_init_riscv.cpp
similarity index 58%
rename from src/java.base/windows/native/libnet/InetAddressImplFactory.c
rename to src/hotspot/cpu/riscv/c2_init_riscv.cpp
index 9d76762ca6cfeeb5ce78c5064e53c67924b93ffa..cdbd69807bee18aeedebb24a29cdae2126b8b4b5 100644
--- a/src/java.base/windows/native/libnet/InetAddressImplFactory.c
+++ b/src/hotspot/cpu/riscv/c2_init_riscv.cpp
@@ -1,12 +1,12 @@
/*
- * Copyright (c) 1997, 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation. Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
+ * published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
@@ -21,26 +21,18 @@
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
+ *
*/
-#include "java_net_InetAddressImplFactory.h"
-#include "net_util.h"
-/*
- * InetAddressImplFactory
- */
+#include "precompiled.hpp"
+#include "opto/compile.hpp"
+#include "opto/node.hpp"
+// processor dependent initialization for riscv
-/*
- * Class: java_net_InetAddressImplFactory
- * Method: isIPv6Supported
- * Signature: ()Z
- */
-JNIEXPORT jboolean JNICALL
-Java_java_net_InetAddressImplFactory_isIPv6Supported(JNIEnv *env, jobject this)
-{
- if (ipv6_available()) {
- return JNI_TRUE;
- } else {
- return JNI_FALSE;
- }
+extern void reg_mask_init();
+
+void Compile::pd_compiler2_init() {
+ guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
+ reg_mask_init();
}
diff --git a/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp b/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a90d9fdc160a5e293bda77977adaf79fc0a680e6
--- /dev/null
+++ b/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "opto/compile.hpp"
+#include "opto/node.hpp"
+#include "opto/output.hpp"
+#include "runtime/sharedRuntime.hpp"
+
+#define __ masm.
+void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const {
+ assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
+ "polling page return stub not created yet");
+ address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
+ RuntimeAddress callback_addr(stub);
+
+ __ bind(entry->_stub_label);
+ InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset);
+ masm.code_section()->relocate(masm.pc(), safepoint_pc.rspec());
+ __ la(t0, safepoint_pc.target());
+ __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset()));
+ __ far_jump(callback_addr);
+}
+#undef __
diff --git a/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..14a68b45026da4349ecaea744c6b0b83f456a71f
--- /dev/null
+++ b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_CODEBUFFER_RISCV_HPP
+#define CPU_RISCV_CODEBUFFER_RISCV_HPP
+
+private:
+ void pd_initialize() {}
+
+public:
+ void flush_bundle(bool start_new_bundle) {}
+
+#endif // CPU_RISCV_CODEBUFFER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..75bc4be7840991d5896edf041d1ab44d05a0210c
--- /dev/null
+++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "code/compiledIC.hpp"
+#include "code/icBuffer.hpp"
+#include "code/nmethod.hpp"
+#include "memory/resourceArea.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/safepoint.hpp"
+
+// ----------------------------------------------------------------------------
+
+#define __ _masm.
+address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
+ precond(cbuf.stubs()->start() != badAddress);
+ precond(cbuf.stubs()->end() != badAddress);
+ // Stub is fixed up when the corresponding call is converted from
+ // calling compiled code to calling interpreted code.
+ // mv xmethod, 0
+ // jalr -4 # to self
+
+ if (mark == NULL) {
+ mark = cbuf.insts_mark(); // Get mark within main instrs section.
+ }
+
+ // Note that the code buffer's insts_mark is always relative to insts.
+ // That's why we must use the macroassembler to generate a stub.
+ MacroAssembler _masm(&cbuf);
+
+ address base = __ start_a_stub(to_interp_stub_size());
+ int offset = __ offset();
+ if (base == NULL) {
+ return NULL; // CodeBuffer::expand failed
+ }
+ // static stub relocation stores the instruction address of the call
+ __ relocate(static_stub_Relocation::spec(mark));
+
+ __ emit_static_call_stub();
+
+ assert((__ offset() - offset) <= (int)to_interp_stub_size(), "stub too big");
+ __ end_a_stub();
+ return base;
+}
+#undef __
+
+int CompiledStaticCall::to_interp_stub_size() {
+ // fence_i + fence* + (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr
+ return NativeFenceI::instruction_size() + 12 * NativeInstruction::instruction_size;
+}
+
+int CompiledStaticCall::to_trampoline_stub_size() {
+ // Somewhat pessimistically, we count 4 instructions here (although
+ // there are only 3) because we sometimes emit an alignment nop.
+ // Trampoline stubs are always word aligned.
+ return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size;
+}
+
+// Relocation entries for call stub, compiled java to interpreter.
+int CompiledStaticCall::reloc_to_interp_stub() {
+ return 4; // 3 in emit_to_interp_stub + 1 in emit_call
+}
+
+void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) {
+ address stub = find_stub();
+ guarantee(stub != NULL, "stub not found");
+
+ if (TraceICs) {
+ ResourceMark rm;
+ tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
+ p2i(instruction_address()),
+ callee->name_and_sig_as_C_string());
+ }
+
+ // Creation also verifies the object.
+ NativeMovConstReg* method_holder
+ = nativeMovConstReg_at(stub + NativeFenceI::instruction_size());
+#ifdef ASSERT
+ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address());
+
+ verify_mt_safe(callee, entry, method_holder, jump);
+#endif
+ // Update stub.
+ method_holder->set_data((intptr_t)callee());
+ NativeGeneralJump::insert_unconditional(method_holder->next_instruction_address(), entry);
+ ICache::invalidate_range(stub, to_interp_stub_size());
+ // Update jump to call.
+ set_destination_mt_safe(stub);
+}
+
+void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
+ // Reset stub.
+ address stub = static_stub->addr();
+ assert(stub != NULL, "stub not found");
+ assert(CompiledICLocker::is_safe(stub), "mt unsafe call");
+ // Creation also verifies the object.
+ NativeMovConstReg* method_holder
+ = nativeMovConstReg_at(stub + NativeFenceI::instruction_size());
+ method_holder->set_data(0);
+ NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
+ jump->set_jump_destination((address)-1);
+}
+
+//-----------------------------------------------------------------------------
+// Non-product mode code
+#ifndef PRODUCT
+
+void CompiledDirectStaticCall::verify() {
+ // Verify call.
+ _call->verify();
+ _call->verify_alignment();
+
+ // Verify stub.
+ address stub = find_stub();
+ assert(stub != NULL, "no stub found for static call");
+ // Creation also verifies the object.
+ NativeMovConstReg* method_holder
+ = nativeMovConstReg_at(stub + NativeFenceI::instruction_size());
+ NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
+
+ // Verify state.
+ assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
+}
+
+#endif // !PRODUCT
diff --git a/src/hotspot/cpu/riscv/copy_riscv.hpp b/src/hotspot/cpu/riscv/copy_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..bceadcc5dcc1cbc03f97541c0a0d2b652eac5070
--- /dev/null
+++ b/src/hotspot/cpu/riscv/copy_riscv.hpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_COPY_RISCV_HPP
+#define CPU_RISCV_COPY_RISCV_HPP
+
+#include OS_CPU_HEADER(copy)
+
+static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
+ julong* to = (julong*) tohw;
+ julong v = ((julong) value << 32) | value;
+ while (count-- > 0) {
+ *to++ = v;
+ }
+}
+
+static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
+ pd_fill_to_words(tohw, count, value);
+}
+
+static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
+ (void)memset(to, value, count);
+}
+
+static void pd_zero_to_words(HeapWord* tohw, size_t count) {
+ pd_fill_to_words(tohw, count, 0);
+}
+
+static void pd_zero_to_bytes(void* to, size_t count) {
+ (void)memset(to, 0, count);
+}
+
+static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
+ (void)memmove(to, from, count * HeapWordSize);
+}
+
+static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
+ switch (count) {
+ case 8: to[7] = from[7]; // fall through
+ case 7: to[6] = from[6]; // fall through
+ case 6: to[5] = from[5]; // fall through
+ case 5: to[4] = from[4]; // fall through
+ case 4: to[3] = from[3]; // fall through
+ case 3: to[2] = from[2]; // fall through
+ case 2: to[1] = from[1]; // fall through
+ case 1: to[0] = from[0]; // fall through
+ case 0: break;
+ default:
+ memcpy(to, from, count * HeapWordSize);
+ break;
+ }
+}
+
+static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
+ shared_disjoint_words_atomic(from, to, count);
+}
+
+static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
+ pd_conjoint_words(from, to, count);
+}
+
+static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
+ pd_disjoint_words(from, to, count);
+}
+
+static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
+ (void)memmove(to, from, count);
+}
+
+static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
+ pd_conjoint_bytes(from, to, count);
+}
+
+static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
+ _Copy_conjoint_jshorts_atomic(from, to, count);
+}
+
+static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
+ _Copy_conjoint_jints_atomic(from, to, count);
+}
+
+static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
+ _Copy_conjoint_jlongs_atomic(from, to, count);
+}
+
+static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
+ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size.");
+ _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count);
+}
+
+static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
+ _Copy_arrayof_conjoint_bytes(from, to, count);
+}
+
+static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
+ _Copy_arrayof_conjoint_jshorts(from, to, count);
+}
+
+static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
+ _Copy_arrayof_conjoint_jints(from, to, count);
+}
+
+static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
+ _Copy_arrayof_conjoint_jlongs(from, to, count);
+}
+
+static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
+ assert(!UseCompressedOops, "foo!");
+ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
+ _Copy_arrayof_conjoint_jlongs(from, to, count);
+}
+
+#endif // CPU_RISCV_COPY_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/disassembler_riscv.hpp b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..b0e5560c9064f502d8e4a8760d88f119ee4520fb
--- /dev/null
+++ b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_DISASSEMBLER_RISCV_HPP
+#define CPU_RISCV_DISASSEMBLER_RISCV_HPP
+
+static int pd_instruction_alignment() {
+ return 1;
+}
+
+static const char* pd_cpu_opts() {
+ return "";
+}
+
+// Returns address of n-th instruction preceding addr,
+// NULL if no preceding instruction can be found.
+// On riscv, we assume a constant instruction length.
+// It might be beneficial to check "is_readable" as we do on ppc and s390.
+static address find_prev_instr(address addr, int n_instr) {
+ return addr - Assembler::instruction_size * n_instr;
+}
+
+// special-case instruction decoding.
+// There may be cases where the binutils disassembler doesn't do
+// the perfect job. In those cases, decode_instruction0 may kick in
+// and do it right.
+// If nothing had to be done, just return "here", otherwise return "here + instr_len(here)"
+static address decode_instruction0(address here, outputStream* st, address virtual_begin = NULL) {
+ return here;
+}
+
+// platform-specific instruction annotations (like value of loaded constants)
+static void annotate(address pc, outputStream* st) {}
+
+#endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP
diff --git a/test/jdk/sun/net/www/httptest/HttpCallback.java b/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp
similarity index 57%
rename from test/jdk/sun/net/www/httptest/HttpCallback.java
rename to src/hotspot/cpu/riscv/foreign_globals_riscv.cpp
index 27af7b7aaf4ebbbe7cfec599d20305ec526db8ed..5c700be9c91cc1f9599d70c8c04cc117e7129c77 100644
--- a/test/jdk/sun/net/www/httptest/HttpCallback.java
+++ b/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp
@@ -1,5 +1,6 @@
/*
- * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -19,21 +20,25 @@
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
+ *
*/
-/**
- * This interface is implemented by classes that wish to handle incoming HTTP
- * requests and generate responses. This could be a general purpose HTTP server
- * or a test case that expects specific requests from a client.
- *
- * The incoming request fields can be examined via the {@link HttpTransaction}
- * object, and a response can also be generated and sent via the request object.
- */
-public interface HttpCallback {
- /**
- * handle the given request and generate an appropriate response.
- * @param msg the transaction containing the request from the
- * client and used to send the response
- */
- void request (HttpTransaction msg);
+#include "precompiled.hpp"
+#include "prims/foreign_globals.hpp"
+#include "utilities/debug.hpp"
+
+// Stubbed out, implement later
+const ABIDescriptor ForeignGlobals::parse_abi_descriptor_impl(jobject jabi) const {
+ Unimplemented();
+ return {};
+}
+
+const BufferLayout ForeignGlobals::parse_buffer_layout_impl(jobject jlayout) const {
+ Unimplemented();
+ return {};
+}
+
+const CallRegs ForeignGlobals::parse_call_regs_impl(jobject jconv) const {
+ ShouldNotCallThis();
+ return {};
}
diff --git a/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp b/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..3ac89752c27eeeac527737d30b2140fcfbe8ed7e
--- /dev/null
+++ b/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP
+#define CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP
+
+class ABIDescriptor {};
+class BufferLayout {};
+
+#endif // CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6e38960598a7a620828058a59ca6d15b78c403de
--- /dev/null
+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
@@ -0,0 +1,697 @@
+/*
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "compiler/oopMap.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/resourceArea.hpp"
+#include "memory/universe.hpp"
+#include "oops/markWord.hpp"
+#include "oops/method.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/javaCalls.hpp"
+#include "runtime/monitorChunk.hpp"
+#include "runtime/os.inline.hpp"
+#include "runtime/signature.hpp"
+#include "runtime/stackWatermarkSet.hpp"
+#include "runtime/stubCodeGenerator.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "vmreg_riscv.inline.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#include "runtime/vframeArray.hpp"
+#endif
+
+#ifdef ASSERT
+void RegisterMap::check_location_valid() {
+}
+#endif
+
+
+// Profiling/safepoint support
+
+bool frame::safe_for_sender(JavaThread *thread) {
+ address addr_sp = (address)_sp;
+ address addr_fp = (address)_fp;
+ address unextended_sp = (address)_unextended_sp;
+
+ // consider stack guards when trying to determine "safe" stack pointers
+ // sp must be within the usable part of the stack (not in guards)
+ if (!thread->is_in_usable_stack(addr_sp)) {
+ return false;
+ }
+
+ // When we are running interpreted code the machine stack pointer, SP, is
+ // set low enough so that the Java expression stack can grow and shrink
+ // without ever exceeding the machine stack bounds. So, ESP >= SP.
+
+ // When we call out of an interpreted method, SP is incremented so that
+ // the space between SP and ESP is removed. The SP saved in the callee's
+ // frame is the SP *before* this increment. So, when we walk a stack of
+ // interpreter frames the sender's SP saved in a frame might be less than
+ // the SP at the point of call.
+
+ // So unextended sp must be within the stack but we need not to check
+ // that unextended sp >= sp
+
+ if (!thread->is_in_full_stack_checked(unextended_sp)) {
+ return false;
+ }
+
+ // an fp must be within the stack and above (but not equal) sp
+ // second evaluation on fp+ is added to handle situation where fp is -1
+ bool fp_safe = thread->is_in_stack_range_excl(addr_fp, addr_sp) &&
+ thread->is_in_full_stack_checked(addr_fp + (return_addr_offset * sizeof(void*)));
+
+ // We know sp/unextended_sp are safe only fp is questionable here
+
+ // If the current frame is known to the code cache then we can attempt to
+ // to construct the sender and do some validation of it. This goes a long way
+ // toward eliminating issues when we get in frame construction code
+
+ if (_cb != NULL) {
+
+ // First check if frame is complete and tester is reliable
+ // Unfortunately we can only check frame complete for runtime stubs and nmethod
+ // other generic buffer blobs are more problematic so we just assume they are
+ // ok. adapter blobs never have a frame complete and are never ok.
+
+ if (!_cb->is_frame_complete_at(_pc)) {
+ if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
+ return false;
+ }
+ }
+
+ // Could just be some random pointer within the codeBlob
+ if (!_cb->code_contains(_pc)) {
+ return false;
+ }
+
+ // Entry frame checks
+ if (is_entry_frame()) {
+ // an entry frame must have a valid fp.
+ return fp_safe && is_entry_frame_valid(thread);
+ }
+
+ intptr_t* sender_sp = NULL;
+ intptr_t* sender_unextended_sp = NULL;
+ address sender_pc = NULL;
+ intptr_t* saved_fp = NULL;
+
+ if (is_interpreted_frame()) {
+ // fp must be safe
+ if (!fp_safe) {
+ return false;
+ }
+
+ sender_pc = (address)this->fp()[return_addr_offset];
+ // for interpreted frames, the value below is the sender "raw" sp,
+ // which can be different from the sender unextended sp (the sp seen
+ // by the sender) because of current frame local variables
+ sender_sp = (intptr_t*) addr_at(sender_sp_offset);
+ sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset];
+ saved_fp = (intptr_t*) this->fp()[link_offset];
+ } else {
+ // must be some sort of compiled/runtime frame
+ // fp does not have to be safe (although it could be check for c1?)
+
+ // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc
+ if (_cb->frame_size() <= 0) {
+ return false;
+ }
+
+ sender_sp = _unextended_sp + _cb->frame_size();
+ // Is sender_sp safe?
+ if (!thread->is_in_full_stack_checked((address)sender_sp)) {
+ return false;
+ }
+
+ sender_unextended_sp = sender_sp;
+ sender_pc = (address) *(sender_sp - 1);
+ saved_fp = (intptr_t*) *(sender_sp - 2);
+ }
+
+
+ // If the potential sender is the interpreter then we can do some more checking
+ if (Interpreter::contains(sender_pc)) {
+
+ // fp is always saved in a recognizable place in any code we generate. However
+ // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp
+ // is really a frame pointer.
+ if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) {
+ return false;
+ }
+
+ // construct the potential sender
+ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
+
+ return sender.is_interpreted_frame_valid(thread);
+ }
+
+ // We must always be able to find a recognizable pc
+ CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
+ if (sender_pc == NULL || sender_blob == NULL) {
+ return false;
+ }
+
+ // Could be a zombie method
+ if (sender_blob->is_zombie() || sender_blob->is_unloaded()) {
+ return false;
+ }
+
+ // Could just be some random pointer within the codeBlob
+ if (!sender_blob->code_contains(sender_pc)) {
+ return false;
+ }
+
+ // We should never be able to see an adapter if the current frame is something from code cache
+ if (sender_blob->is_adapter_blob()) {
+ return false;
+ }
+
+ // Could be the call_stub
+ if (StubRoutines::returns_to_call_stub(sender_pc)) {
+ if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) {
+ return false;
+ }
+
+ // construct the potential sender
+ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
+
+ // Validate the JavaCallWrapper an entry frame must have
+ address jcw = (address)sender.entry_frame_call_wrapper();
+
+ bool jcw_safe = (jcw < thread->stack_base()) && (jcw > (address)sender.fp());
+
+ return jcw_safe;
+ }
+
+ CompiledMethod* nm = sender_blob->as_compiled_method_or_null();
+ if (nm != NULL) {
+ if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) ||
+ nm->method()->is_method_handle_intrinsic()) {
+ return false;
+ }
+ }
+
+ // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size
+ // because the return address counts against the callee's frame.
+ if (sender_blob->frame_size() <= 0) {
+ assert(!sender_blob->is_compiled(), "should count return address at least");
+ return false;
+ }
+
+ // We should never be able to see anything here except an nmethod. If something in the
+ // code cache (current frame) is called by an entity within the code cache that entity
+ // should not be anything but the call stub (already covered), the interpreter (already covered)
+ // or an nmethod.
+ if (!sender_blob->is_compiled()) {
+ return false;
+ }
+
+ // Could put some more validation for the potential non-interpreted sender
+ // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
+
+ // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
+
+ // We've validated the potential sender that would be created
+ return true;
+ }
+
+ // Must be native-compiled frame. Since sender will try and use fp to find
+ // linkages it must be safe
+ if (!fp_safe) {
+ return false;
+ }
+
+ // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
+ if ((address)this->fp()[return_addr_offset] == NULL) { return false; }
+
+ return true;
+}
+
+void frame::patch_pc(Thread* thread, address pc) {
+ assert(_cb == CodeCache::find_blob(pc), "unexpected pc");
+ address* pc_addr = &(((address*) sp())[-1]);
+ if (TracePcPatching) {
+ tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
+ p2i(pc_addr), p2i(*pc_addr), p2i(pc));
+ }
+ // Either the return address is the original one or we are going to
+ // patch in the same address that's already there.
+ assert(_pc == *pc_addr || pc == *pc_addr, "must be");
+ *pc_addr = pc;
+ address original_pc = CompiledMethod::get_deopt_original_pc(this);
+ if (original_pc != NULL) {
+ assert(original_pc == _pc, "expected original PC to be stored before patching");
+ _deopt_state = is_deoptimized;
+ // leave _pc as is
+ } else {
+ _deopt_state = not_deoptimized;
+ _pc = pc;
+ }
+}
+
+bool frame::is_interpreted_frame() const {
+ return Interpreter::contains(pc());
+}
+
+int frame::frame_size(RegisterMap* map) const {
+ frame sender = this->sender(map);
+ return sender.sp() - sp();
+}
+
+intptr_t* frame::entry_frame_argument_at(int offset) const {
+ // convert offset to index to deal with tsi
+ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
+ // Entry frame's arguments are always in relation to unextended_sp()
+ return &unextended_sp()[index];
+}
+
+// sender_sp
+intptr_t* frame::interpreter_frame_sender_sp() const {
+ assert(is_interpreted_frame(), "interpreted frame expected");
+ return (intptr_t*) at(interpreter_frame_sender_sp_offset);
+}
+
+void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) {
+ assert(is_interpreted_frame(), "interpreted frame expected");
+ ptr_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp);
+}
+
+
+// monitor elements
+
+BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
+ return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset);
+}
+
+BasicObjectLock* frame::interpreter_frame_monitor_end() const {
+ BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset);
+ // make sure the pointer points inside the frame
+ assert(sp() <= (intptr_t*) result, "monitor end should be above the stack pointer");
+ assert((intptr_t*) result < fp(), "monitor end should be strictly below the frame pointer");
+ return result;
+}
+
+void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
+ *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
+}
+
+// Used by template based interpreter deoptimization
+void frame::interpreter_frame_set_last_sp(intptr_t* last_sp) {
+ *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = last_sp;
+}
+
+frame frame::sender_for_entry_frame(RegisterMap* map) const {
+ assert(map != NULL, "map must be set");
+ // Java frame called from C; skip all C frames and return top C
+ // frame of that chunk as the sender
+ JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
+ assert(!entry_frame_is_first(), "next Java fp must be non zero");
+ assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
+ // Since we are walking the stack now this nested anchor is obviously walkable
+ // even if it wasn't when it was stacked.
+ if (!jfa->walkable()) {
+ // Capture _last_Java_pc (if needed) and mark anchor walkable.
+ jfa->capture_last_Java_pc();
+ }
+ map->clear();
+ assert(map->include_argument_oops(), "should be set by clear");
+ vmassert(jfa->last_Java_pc() != NULL, "not walkable");
+ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
+ return fr;
+}
+
+OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const {
+ ShouldNotCallThis();
+ return nullptr;
+}
+
+bool frame::optimized_entry_frame_is_first() const {
+ ShouldNotCallThis();
+ return false;
+}
+
+frame frame::sender_for_optimized_entry_frame(RegisterMap* map) const {
+ ShouldNotCallThis();
+ return {};
+}
+
+//------------------------------------------------------------------------------
+// frame::verify_deopt_original_pc
+//
+// Verifies the calculated original PC of a deoptimization PC for the
+// given unextended SP.
+#ifdef ASSERT
+void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) {
+ frame fr;
+
+ // This is ugly but it's better than to change {get,set}_original_pc
+ // to take an SP value as argument. And it's only a debugging
+ // method anyway.
+ fr._unextended_sp = unextended_sp;
+
+ assert_cond(nm != NULL);
+ address original_pc = nm->get_original_pc(&fr);
+ assert(nm->insts_contains_inclusive(original_pc),
+ "original PC must be in the main code section of the the compiled method (or must be immediately following it)");
+}
+#endif
+
+//------------------------------------------------------------------------------
+// frame::adjust_unextended_sp
+void frame::adjust_unextended_sp() {
+ // On riscv, sites calling method handle intrinsics and lambda forms are treated
+ // as any other call site. Therefore, no special action is needed when we are
+ // returning to any of these call sites.
+
+ if (_cb != NULL) {
+ CompiledMethod* sender_cm = _cb->as_compiled_method_or_null();
+ if (sender_cm != NULL) {
+ // If the sender PC is a deoptimization point, get the original PC.
+ if (sender_cm->is_deopt_entry(_pc) ||
+ sender_cm->is_deopt_mh_entry(_pc)) {
+ DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp));
+ }
+ }
+ }
+}
+
+//------------------------------------------------------------------------------
+// frame::update_map_with_saved_link
+void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) {
+ // The interpreter and compiler(s) always save fp in a known
+ // location on entry. We must record where that location is
+ // so that if fp was live on callout from c2 we can find
+ // the saved copy no matter what it called.
+
+ // Since the interpreter always saves fp if we record where it is then
+ // we don't have to always save fp on entry and exit to c2 compiled
+ // code, on entry will be enough.
+ assert(map != NULL, "map must be set");
+ map->set_location(::fp->as_VMReg(), (address) link_addr);
+ // this is weird "H" ought to be at a higher address however the
+ // oopMaps seems to have the "H" regs at the same address and the
+ // vanilla register.
+ map->set_location(::fp->as_VMReg()->next(), (address) link_addr);
+}
+
+
+//------------------------------------------------------------------------------
+// frame::sender_for_interpreter_frame
+frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
+ // SP is the raw SP from the sender after adapter or interpreter
+ // extension.
+ intptr_t* sender_sp = this->sender_sp();
+
+ // This is the sp before any possible extension (adapter/locals).
+ intptr_t* unextended_sp = interpreter_frame_sender_sp();
+
+#ifdef COMPILER2
+ assert(map != NULL, "map must be set");
+ if (map->update_map()) {
+ update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
+ }
+#endif // COMPILER2
+
+ return frame(sender_sp, unextended_sp, link(), sender_pc());
+}
+
+
+//------------------------------------------------------------------------------
+// frame::sender_for_compiled_frame
+frame frame::sender_for_compiled_frame(RegisterMap* map) const {
+ // we cannot rely upon the last fp having been saved to the thread
+ // in C2 code but it will have been pushed onto the stack. so we
+ // have to find it relative to the unextended sp
+
+ assert(_cb->frame_size() >= 0, "must have non-zero frame size");
+ intptr_t* l_sender_sp = unextended_sp() + _cb->frame_size();
+ intptr_t* unextended_sp = l_sender_sp;
+
+ // the return_address is always the word on the stack
+ address sender_pc = (address) *(l_sender_sp + frame::return_addr_offset);
+
+ intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp + frame::link_offset);
+
+ assert(map != NULL, "map must be set");
+ if (map->update_map()) {
+ // Tell GC to use argument oopmaps for some runtime stubs that need it.
+ // For C1, the runtime stub might not have oop maps, so set this flag
+ // outside of update_register_map.
+ map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
+ if (_cb->oop_maps() != NULL) {
+ OopMapSet::update_register_map(this, map);
+ }
+
+ // Since the prolog does the save and restore of FP there is no
+ // oopmap for it so we must fill in its location as if there was
+ // an oopmap entry since if our caller was compiled code there
+ // could be live jvm state in it.
+ update_map_with_saved_link(map, saved_fp_addr);
+ }
+
+ return frame(l_sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
+}
+
+//------------------------------------------------------------------------------
+// frame::sender_raw
+frame frame::sender_raw(RegisterMap* map) const {
+ // Default is we done have to follow them. The sender_for_xxx will
+ // update it accordingly
+ assert(map != NULL, "map must be set");
+ map->set_include_argument_oops(false);
+
+ if (is_entry_frame()) {
+ return sender_for_entry_frame(map);
+ }
+ if (is_interpreted_frame()) {
+ return sender_for_interpreter_frame(map);
+ }
+ assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
+
+ // This test looks odd: why is it not is_compiled_frame() ? That's
+ // because stubs also have OOP maps.
+ if (_cb != NULL) {
+ return sender_for_compiled_frame(map);
+ }
+
+ // Must be native-compiled frame, i.e. the marshaling code for native
+ // methods that exists in the core system.
+ return frame(sender_sp(), link(), sender_pc());
+}
+
+frame frame::sender(RegisterMap* map) const {
+ frame result = sender_raw(map);
+
+ if (map->process_frames()) {
+ StackWatermarkSet::on_iteration(map->thread(), result);
+ }
+
+ return result;
+}
+
+bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
+ assert(is_interpreted_frame(), "Not an interpreted frame");
+ // These are reasonable sanity checks
+ if (fp() == NULL || (intptr_t(fp()) & (wordSize-1)) != 0) {
+ return false;
+ }
+ if (sp() == NULL || (intptr_t(sp()) & (wordSize-1)) != 0) {
+ return false;
+ }
+ if (fp() + interpreter_frame_initial_sp_offset < sp()) {
+ return false;
+ }
+ // These are hacks to keep us out of trouble.
+ // The problem with these is that they mask other problems
+ if (fp() <= sp()) { // this attempts to deal with unsigned comparison above
+ return false;
+ }
+
+ // do some validation of frame elements
+
+ // first the method
+ Method* m = *interpreter_frame_method_addr();
+ // validate the method we'd find in this potential sender
+ if (!Method::is_valid_method(m)) {
+ return false;
+ }
+
+ // stack frames shouldn't be much larger than max_stack elements
+ // this test requires the use of unextended_sp which is the sp as seen by
+ // the current frame, and not sp which is the "raw" pc which could point
+ // further because of local variables of the callee method inserted after
+ // method arguments
+ if (fp() - unextended_sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) {
+ return false;
+ }
+
+ // validate bci/bcx
+ address bcp = interpreter_frame_bcp();
+ if (m->validate_bci_from_bcp(bcp) < 0) {
+ return false;
+ }
+
+ // validate constantPoolCache*
+ ConstantPoolCache* cp = *interpreter_frame_cache_addr();
+ if (MetaspaceObj::is_valid(cp) == false) {
+ return false;
+ }
+
+ // validate locals
+ address locals = (address) *interpreter_frame_locals_addr();
+ if (locals > thread->stack_base() || locals < (address) fp()) {
+ return false;
+ }
+
+ // We'd have to be pretty unlucky to be mislead at this point
+ return true;
+}
+
+BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
+ assert(is_interpreted_frame(), "interpreted frame expected");
+ Method* method = interpreter_frame_method();
+ BasicType type = method->result_type();
+
+ intptr_t* tos_addr = NULL;
+ if (method->is_native()) {
+ tos_addr = (intptr_t*)sp();
+ if (type == T_FLOAT || type == T_DOUBLE) {
+ // This is because we do a push(ltos) after push(dtos) in generate_native_entry.
+ tos_addr += 2 * Interpreter::stackElementWords;
+ }
+ } else {
+ tos_addr = (intptr_t*)interpreter_frame_tos_address();
+ }
+
+ switch (type) {
+ case T_OBJECT :
+ case T_ARRAY : {
+ oop obj;
+ if (method->is_native()) {
+ obj = cast_to_oop(at(interpreter_frame_oop_temp_offset));
+ } else {
+ oop* obj_p = (oop*)tos_addr;
+ obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
+ }
+ assert(Universe::is_in_heap_or_null(obj), "sanity check");
+ *oop_result = obj;
+ break;
+ }
+ case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break;
+ case T_BYTE : value_result->b = *(jbyte*)tos_addr; break;
+ case T_CHAR : value_result->c = *(jchar*)tos_addr; break;
+ case T_SHORT : value_result->s = *(jshort*)tos_addr; break;
+ case T_INT : value_result->i = *(jint*)tos_addr; break;
+ case T_LONG : value_result->j = *(jlong*)tos_addr; break;
+ case T_FLOAT : {
+ value_result->f = *(jfloat*)tos_addr;
+ break;
+ }
+ case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break;
+ case T_VOID : /* Nothing to do */ break;
+ default : ShouldNotReachHere();
+ }
+
+ return type;
+}
+
+
+intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
+ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
+ return &interpreter_frame_tos_address()[index];
+}
+
+#ifndef PRODUCT
+
+#define DESCRIBE_FP_OFFSET(name) \
+ values.describe(frame_no, fp() + frame::name##_offset, #name)
+
+void frame::describe_pd(FrameValues& values, int frame_no) {
+ if (is_interpreted_frame()) {
+ DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
+ DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
+ DESCRIBE_FP_OFFSET(interpreter_frame_method);
+ DESCRIBE_FP_OFFSET(interpreter_frame_mdp);
+ DESCRIBE_FP_OFFSET(interpreter_frame_mirror);
+ DESCRIBE_FP_OFFSET(interpreter_frame_cache);
+ DESCRIBE_FP_OFFSET(interpreter_frame_locals);
+ DESCRIBE_FP_OFFSET(interpreter_frame_bcp);
+ DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
+ }
+}
+#endif
+
+intptr_t *frame::initial_deoptimization_info() {
+ // Not used on riscv, but we must return something.
+ return NULL;
+}
+
+intptr_t* frame::real_fp() const {
+ if (_cb != NULL) {
+ // use the frame size if valid
+ int size = _cb->frame_size();
+ if (size > 0) {
+ return unextended_sp() + size;
+ }
+ }
+ // else rely on fp()
+ assert(!is_compiled_frame(), "unknown compiled frame size");
+ return fp();
+}
+
+#undef DESCRIBE_FP_OFFSET
+
+#ifndef PRODUCT
+// This is a generic constructor which is only used by pns() in debug.cpp.
+frame::frame(void* ptr_sp, void* ptr_fp, void* pc) {
+ init((intptr_t*)ptr_sp, (intptr_t*)ptr_fp, (address)pc);
+}
+
+#endif
+
+void JavaFrameAnchor::make_walkable(JavaThread* thread) {
+ // last frame set?
+ if (last_Java_sp() == NULL) { return; }
+ // already walkable?
+ if (walkable()) { return; }
+ vmassert(Thread::current() == (Thread*)thread, "not current thread");
+ vmassert(last_Java_sp() != NULL, "not called from Java code?");
+ vmassert(last_Java_pc() == NULL, "already walkable");
+ capture_last_Java_pc();
+ vmassert(walkable(), "something went wrong");
+}
+
+void JavaFrameAnchor::capture_last_Java_pc() {
+ vmassert(_last_Java_sp != NULL, "no last frame set");
+ vmassert(_last_Java_pc == NULL, "already walkable");
+ _last_Java_pc = (address)_last_Java_sp[-1];
+}
diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..c06aaa9e391b439bd1bf8f91dba05c74592a9c4a
--- /dev/null
+++ b/src/hotspot/cpu/riscv/frame_riscv.hpp
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_FRAME_RISCV_HPP
+#define CPU_RISCV_FRAME_RISCV_HPP
+
+#include "runtime/synchronizer.hpp"
+
+// A frame represents a physical stack frame (an activation). Frames can be
+// C or Java frames, and the Java frames can be interpreted or compiled.
+// In contrast, vframes represent source-level activations, so that one physical frame
+// can correspond to multiple source level frames because of inlining.
+// A frame is comprised of {pc, fp, sp}
+// ------------------------------ Asm interpreter ----------------------------------------
+// Layout of asm interpreter frame:
+// [expression stack ] * <- sp
+
+// [monitors[0] ] \
+// ... | monitor block size = k
+// [monitors[k-1] ] /
+// [frame initial esp ] ( == &monitors[0], initially here) initial_sp_offset
+// [byte code index/pointr] = bcx() bcx_offset
+
+// [pointer to locals ] = locals() locals_offset
+// [constant pool cache ] = cache() cache_offset
+
+// [klass of method ] = mirror() mirror_offset
+// [padding ]
+
+// [methodData ] = mdp() mdx_offset
+// [Method ] = method() method_offset
+
+// [last esp ] = last_sp() last_sp_offset
+// [old stack pointer ] (sender_sp) sender_sp_offset
+
+// [old frame pointer ]
+// [return pc ]
+
+// [last sp ] <- fp = link()
+// [oop temp ] (only for native calls)
+
+// [padding ] (to preserve machine SP alignment)
+// [locals and parameters ]
+// <- sender sp
+// ------------------------------ Asm interpreter ----------------------------------------
+
+// ------------------------------ C Frame ------------------------------------------------
+// Stack: gcc with -fno-omit-frame-pointer
+// .
+// .
+// +-> .
+// | +-----------------+ |
+// | | return address | |
+// | | previous fp ------+
+// | | saved registers |
+// | | local variables |
+// | | ... | <-+
+// | +-----------------+ |
+// | | return address | |
+// +------ previous fp | |
+// | saved registers | |
+// | local variables | |
+// +-> | ... | |
+// | +-----------------+ |
+// | | return address | |
+// | | previous fp ------+
+// | | saved registers |
+// | | local variables |
+// | | ... | <-+
+// | +-----------------+ |
+// | | return address | |
+// +------ previous fp | |
+// | saved registers | |
+// | local variables | |
+// $fp --> | ... | |
+// +-----------------+ |
+// | return address | |
+// | previous fp ------+
+// | saved registers |
+// $sp --> | local variables |
+// +-----------------+
+// ------------------------------ C Frame ------------------------------------------------
+
+ public:
+ enum {
+ pc_return_offset = 0,
+ // All frames
+ link_offset = -2,
+ return_addr_offset = -1,
+ sender_sp_offset = 0,
+ // Interpreter frames
+ interpreter_frame_oop_temp_offset = 1, // for native calls only
+
+ interpreter_frame_sender_sp_offset = -3,
+ // outgoing sp before a call to an invoked method
+ interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1,
+ interpreter_frame_method_offset = interpreter_frame_last_sp_offset - 1,
+ interpreter_frame_mdp_offset = interpreter_frame_method_offset - 1,
+ interpreter_frame_padding_offset = interpreter_frame_mdp_offset - 1,
+ interpreter_frame_mirror_offset = interpreter_frame_padding_offset - 1,
+ interpreter_frame_cache_offset = interpreter_frame_mirror_offset - 1,
+ interpreter_frame_locals_offset = interpreter_frame_cache_offset - 1,
+ interpreter_frame_bcp_offset = interpreter_frame_locals_offset - 1,
+ interpreter_frame_initial_sp_offset = interpreter_frame_bcp_offset - 1,
+
+ interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset,
+ interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset,
+
+ // Entry frames
+ // n.b. these values are determined by the layout defined in
+ // stubGenerator for the Java call stub
+ entry_frame_after_call_words = 22,
+ entry_frame_call_wrapper_offset = -10,
+
+ // we don't need a save area
+ arg_reg_save_area_bytes = 0
+ };
+
+ intptr_t ptr_at(int offset) const {
+ return *ptr_at_addr(offset);
+ }
+
+ void ptr_at_put(int offset, intptr_t value) {
+ *ptr_at_addr(offset) = value;
+ }
+
+ private:
+ // an additional field beyond _sp and _pc:
+ intptr_t* _fp; // frame pointer
+ // The interpreter and adapters will extend the frame of the caller.
+ // Since oopMaps are based on the sp of the caller before extension
+ // we need to know that value. However in order to compute the address
+ // of the return address we need the real "raw" sp. Since sparc already
+ // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's
+ // original sp we use that convention.
+
+ intptr_t* _unextended_sp;
+ void adjust_unextended_sp();
+
+ intptr_t* ptr_at_addr(int offset) const {
+ return (intptr_t*) addr_at(offset);
+ }
+
+#ifdef ASSERT
+ // Used in frame::sender_for_{interpreter,compiled}_frame
+ static void verify_deopt_original_pc( CompiledMethod* nm, intptr_t* unextended_sp);
+#endif
+
+ public:
+ // Constructors
+
+ frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc);
+
+ frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc);
+
+ frame(intptr_t* ptr_sp, intptr_t* ptr_fp);
+
+ void init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc);
+
+ // accessors for the instance variables
+ // Note: not necessarily the real 'frame pointer' (see real_fp)
+ intptr_t* fp() const { return _fp; }
+
+ inline address* sender_pc_addr() const;
+
+ // expression stack tos if we are nested in a java call
+ intptr_t* interpreter_frame_last_sp() const;
+
+ // helper to update a map with callee-saved RBP
+ static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
+
+ // deoptimization support
+ void interpreter_frame_set_last_sp(intptr_t* last_sp);
+
+ static jint interpreter_frame_expression_stack_direction() { return -1; }
+
+ // returns the sending frame, without applying any barriers
+ frame sender_raw(RegisterMap* map) const;
+
+#endif // CPU_RISCV_FRAME_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..5ac1bf57f57dbd3359315e92388af27a43c98cb6
--- /dev/null
+++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
@@ -0,0 +1,248 @@
+/*
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_FRAME_RISCV_INLINE_HPP
+#define CPU_RISCV_FRAME_RISCV_INLINE_HPP
+
+#include "code/codeCache.hpp"
+#include "code/vmreg.inline.hpp"
+
+// Inline functions for RISCV frames:
+
+// Constructors:
+
+inline frame::frame() {
+ _pc = NULL;
+ _sp = NULL;
+ _unextended_sp = NULL;
+ _fp = NULL;
+ _cb = NULL;
+ _deopt_state = unknown;
+}
+
+static int spin;
+
+inline void frame::init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) {
+ intptr_t a = intptr_t(ptr_sp);
+ intptr_t b = intptr_t(ptr_fp);
+ _sp = ptr_sp;
+ _unextended_sp = ptr_sp;
+ _fp = ptr_fp;
+ _pc = pc;
+ assert(pc != NULL, "no pc?");
+ _cb = CodeCache::find_blob(pc);
+ adjust_unextended_sp();
+
+ address original_pc = CompiledMethod::get_deopt_original_pc(this);
+ if (original_pc != NULL) {
+ _pc = original_pc;
+ _deopt_state = is_deoptimized;
+ } else {
+ _deopt_state = not_deoptimized;
+ }
+}
+
+inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) {
+ init(ptr_sp, ptr_fp, pc);
+}
+
+inline frame::frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc) {
+ intptr_t a = intptr_t(ptr_sp);
+ intptr_t b = intptr_t(ptr_fp);
+ _sp = ptr_sp;
+ _unextended_sp = unextended_sp;
+ _fp = ptr_fp;
+ _pc = pc;
+ assert(pc != NULL, "no pc?");
+ _cb = CodeCache::find_blob(pc);
+ adjust_unextended_sp();
+
+ address original_pc = CompiledMethod::get_deopt_original_pc(this);
+ if (original_pc != NULL) {
+ _pc = original_pc;
+ assert(_cb->as_compiled_method()->insts_contains_inclusive(_pc),
+ "original PC must be in the main code section of the the compiled method (or must be immediately following it)");
+ _deopt_state = is_deoptimized;
+ } else {
+ _deopt_state = not_deoptimized;
+ }
+}
+
+inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp) {
+ intptr_t a = intptr_t(ptr_sp);
+ intptr_t b = intptr_t(ptr_fp);
+ _sp = ptr_sp;
+ _unextended_sp = ptr_sp;
+ _fp = ptr_fp;
+ _pc = (address)(ptr_sp[-1]);
+
+ // Here's a sticky one. This constructor can be called via AsyncGetCallTrace
+ // when last_Java_sp is non-null but the pc fetched is junk. If we are truly
+ // unlucky the junk value could be to a zombied method and we'll die on the
+ // find_blob call. This is also why we can have no asserts on the validity
+ // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
+ // -> pd_last_frame should use a specialized version of pd_last_frame which could
+ // call a specilaized frame constructor instead of this one.
+ // Then we could use the assert below. However this assert is of somewhat dubious
+ // value.
+
+ _cb = CodeCache::find_blob(_pc);
+ adjust_unextended_sp();
+
+ address original_pc = CompiledMethod::get_deopt_original_pc(this);
+ if (original_pc != NULL) {
+ _pc = original_pc;
+ _deopt_state = is_deoptimized;
+ } else {
+ _deopt_state = not_deoptimized;
+ }
+}
+
+// Accessors
+
+inline bool frame::equal(frame other) const {
+ bool ret = sp() == other.sp() &&
+ unextended_sp() == other.unextended_sp() &&
+ fp() == other.fp() &&
+ pc() == other.pc();
+ assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction");
+ return ret;
+}
+
+// Return unique id for this frame. The id must have a value where we can distinguish
+// identity and younger/older relationship. NULL represents an invalid (incomparable)
+// frame.
+inline intptr_t* frame::id(void) const { return unextended_sp(); }
+
+// Return true if the frame is older (less recent activation) than the frame represented by id
+inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id");
+ return this->id() > id ; }
+
+inline intptr_t* frame::link() const { return (intptr_t*) *(intptr_t **)addr_at(link_offset); }
+
+inline intptr_t* frame::link_or_null() const {
+ intptr_t** ptr = (intptr_t **)addr_at(link_offset);
+ return os::is_readable_pointer(ptr) ? *ptr : NULL;
+}
+
+inline intptr_t* frame::unextended_sp() const { return _unextended_sp; }
+
+// Return address
+inline address* frame::sender_pc_addr() const { return (address*) addr_at(return_addr_offset); }
+inline address frame::sender_pc() const { return *sender_pc_addr(); }
+inline intptr_t* frame::sender_sp() const { return addr_at(sender_sp_offset); }
+
+inline intptr_t** frame::interpreter_frame_locals_addr() const {
+ return (intptr_t**)addr_at(interpreter_frame_locals_offset);
+}
+
+inline intptr_t* frame::interpreter_frame_last_sp() const {
+ return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset);
+}
+
+inline intptr_t* frame::interpreter_frame_bcp_addr() const {
+ return (intptr_t*)addr_at(interpreter_frame_bcp_offset);
+}
+
+inline intptr_t* frame::interpreter_frame_mdp_addr() const {
+ return (intptr_t*)addr_at(interpreter_frame_mdp_offset);
+}
+
+
+// Constant pool cache
+
+inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
+ return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset);
+}
+
+// Method
+
+inline Method** frame::interpreter_frame_method_addr() const {
+ return (Method**)addr_at(interpreter_frame_method_offset);
+}
+
+// Mirror
+
+inline oop* frame::interpreter_frame_mirror_addr() const {
+ return (oop*)addr_at(interpreter_frame_mirror_offset);
+}
+
+// top of expression stack
+inline intptr_t* frame::interpreter_frame_tos_address() const {
+ intptr_t* last_sp = interpreter_frame_last_sp();
+ if (last_sp == NULL) {
+ return sp();
+ } else {
+ // sp() may have been extended or shrunk by an adapter. At least
+ // check that we don't fall behind the legal region.
+ // For top deoptimized frame last_sp == interpreter_frame_monitor_end.
+ assert(last_sp <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos");
+ return last_sp;
+ }
+}
+
+inline oop* frame::interpreter_frame_temp_oop_addr() const {
+ return (oop *)(fp() + interpreter_frame_oop_temp_offset);
+}
+
+inline int frame::interpreter_frame_monitor_size() {
+ return BasicObjectLock::size();
+}
+
+
+// expression stack
+// (the max_stack arguments are used by the GC; see class FrameClosure)
+
+inline intptr_t* frame::interpreter_frame_expression_stack() const {
+ intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end();
+ return monitor_end-1;
+}
+
+
+// Entry frames
+
+inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
+ return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset);
+}
+
+
+// Compiled frames
+PRAGMA_DIAG_PUSH
+PRAGMA_NONNULL_IGNORED
+inline oop frame::saved_oop_result(RegisterMap* map) const {
+ oop* result_adr = (oop *)map->location(x10->as_VMReg());
+ guarantee(result_adr != NULL, "bad register save location");
+ return (*result_adr);
+}
+
+inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
+ oop* result_adr = (oop *)map->location(x10->as_VMReg());
+ guarantee(result_adr != NULL, "bad register save location");
+ *result_adr = obj;
+}
+PRAGMA_DIAG_POP
+
+#endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP
diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1c46b3947d3c88310adaae1f50314c2aa962768f
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
@@ -0,0 +1,484 @@
+/*
+ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "gc/g1/g1BarrierSet.hpp"
+#include "gc/g1/g1BarrierSetAssembler.hpp"
+#include "gc/g1/g1BarrierSetRuntime.hpp"
+#include "gc/g1/g1CardTable.hpp"
+#include "gc/g1/g1ThreadLocalData.hpp"
+#include "gc/g1/heapRegion.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "interpreter/interp_masm.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/thread.hpp"
+#ifdef COMPILER1
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "gc/g1/c1/g1BarrierSetC1.hpp"
+#endif
+
+#define __ masm->
+
+void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
+ Register addr, Register count, RegSet saved_regs) {
+ assert_cond(masm != NULL);
+ bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
+ if (!dest_uninitialized) {
+ Label done;
+ Address in_progress(xthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
+
+ // Is marking active?
+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
+ __ lwu(t0, in_progress);
+ } else {
+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+ __ lbu(t0, in_progress);
+ }
+ __ beqz(t0, done);
+
+ __ push_reg(saved_regs, sp);
+ if (count == c_rarg0) {
+ if (addr == c_rarg1) {
+ // exactly backwards!!
+ __ mv(t0, c_rarg0);
+ __ mv(c_rarg0, c_rarg1);
+ __ mv(c_rarg1, t0);
+ } else {
+ __ mv(c_rarg1, count);
+ __ mv(c_rarg0, addr);
+ }
+ } else {
+ __ mv(c_rarg0, addr);
+ __ mv(c_rarg1, count);
+ }
+ if (UseCompressedOops) {
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2);
+ } else {
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2);
+ }
+ __ pop_reg(saved_regs, sp);
+
+ __ bind(done);
+ }
+}
+
+void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
+ Register start, Register count, Register tmp, RegSet saved_regs) {
+ assert_cond(masm != NULL);
+ __ push_reg(saved_regs, sp);
+ assert_different_registers(start, count, tmp);
+ assert_different_registers(c_rarg0, count);
+ __ mv(c_rarg0, start);
+ __ mv(c_rarg1, count);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
+ __ pop_reg(saved_regs, sp);
+}
+
+void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp,
+ bool tosca_live,
+ bool expand_call) {
+ // If expand_call is true then we expand the call_VM_leaf macro
+ // directly to skip generating the check by
+ // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
+
+ assert_cond(masm != NULL);
+ assert(thread == xthread, "must be");
+
+ Label done;
+ Label runtime;
+
+ assert_different_registers(obj, pre_val, tmp, t0);
+ assert(pre_val != noreg && tmp != noreg, "expecting a register");
+
+ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
+ Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
+ Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
+
+ // Is marking active?
+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width
+ __ lwu(tmp, in_progress);
+ } else {
+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+ __ lbu(tmp, in_progress);
+ }
+ __ beqz(tmp, done);
+
+ // Do we need to load the previous value?
+ if (obj != noreg) {
+ __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
+ }
+
+ // Is the previous value null?
+ __ beqz(pre_val, done);
+
+ // Can we store original value in the thread's buffer?
+ // Is index == 0?
+ // (The index field is typed as size_t.)
+
+ __ ld(tmp, index); // tmp := *index_adr
+ __ beqz(tmp, runtime); // tmp == 0?
+ // If yes, goto runtime
+
+ __ sub(tmp, tmp, wordSize); // tmp := tmp - wordSize
+ __ sd(tmp, index); // *index_adr := tmp
+ __ ld(t0, buffer);
+ __ add(tmp, tmp, t0); // tmp := tmp + *buffer_adr
+
+ // Record the previous value
+ __ sd(pre_val, Address(tmp, 0));
+ __ j(done);
+
+ __ bind(runtime);
+ // save the live input values
+ RegSet saved = RegSet::of(pre_val);
+ if (tosca_live) { saved += RegSet::of(x10); }
+ if (obj != noreg) { saved += RegSet::of(obj); }
+
+ __ push_reg(saved, sp);
+
+ if (expand_call) {
+ assert(pre_val != c_rarg1, "smashed arg");
+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
+ } else {
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
+ }
+
+ __ pop_reg(saved, sp);
+
+ __ bind(done);
+
+}
+
+void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp,
+ Register tmp2) {
+ assert_cond(masm != NULL);
+ assert(thread == xthread, "must be");
+ assert_different_registers(store_addr, new_val, thread, tmp, tmp2,
+ t0);
+ assert(store_addr != noreg && new_val != noreg && tmp != noreg &&
+ tmp2 != noreg, "expecting a register");
+
+ Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
+ Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
+
+ BarrierSet* bs = BarrierSet::barrier_set();
+ CardTableBarrierSet* ctbs = barrier_set_cast(bs);
+ CardTable* ct = ctbs->card_table();
+
+ Label done;
+ Label runtime;
+
+ // Does store cross heap regions?
+
+ __ xorr(tmp, store_addr, new_val);
+ __ srli(tmp, tmp, HeapRegion::LogOfHRGrainBytes);
+ __ beqz(tmp, done);
+
+ // crosses regions, storing NULL?
+
+ __ beqz(new_val, done);
+
+ // storing region crossing non-NULL, is card already dirty?
+
+ ExternalAddress cardtable((address) ct->byte_map_base());
+ const Register card_addr = tmp;
+
+ __ srli(card_addr, store_addr, CardTable::card_shift());
+
+ // get the address of the card
+ __ load_byte_map_base(tmp2);
+ __ add(card_addr, card_addr, tmp2);
+ __ lbu(tmp2, Address(card_addr));
+ __ mv(t0, (int)G1CardTable::g1_young_card_val());
+ __ beq(tmp2, t0, done);
+
+ assert((int)CardTable::dirty_card_val() == 0, "must be 0");
+
+ __ membar(MacroAssembler::StoreLoad);
+
+ __ lbu(tmp2, Address(card_addr));
+ __ beqz(tmp2, done);
+
+ // storing a region crossing, non-NULL oop, card is clean.
+ // dirty card and log.
+
+ __ sb(zr, Address(card_addr));
+
+ __ ld(t0, queue_index);
+ __ beqz(t0, runtime);
+ __ sub(t0, t0, wordSize);
+ __ sd(t0, queue_index);
+
+ __ ld(tmp2, buffer);
+ __ add(t0, tmp2, t0);
+ __ sd(card_addr, Address(t0, 0));
+ __ j(done);
+
+ __ bind(runtime);
+ // save the live input values
+ RegSet saved = RegSet::of(store_addr);
+ __ push_reg(saved, sp);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
+ __ pop_reg(saved, sp);
+
+ __ bind(done);
+}
+
+void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+ Register dst, Address src, Register tmp1, Register tmp_thread) {
+ assert_cond(masm != NULL);
+ bool on_oop = is_reference_type(type);
+ bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
+ bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
+ bool on_reference = on_weak || on_phantom;
+ ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
+ if (on_oop && on_reference) {
+ // RA is live. It must be saved around calls.
+ __ enter(); // barrier may call runtime
+ // Generate the G1 pre-barrier code to log the value of
+ // the referent field in an SATB buffer.
+ g1_write_barrier_pre(masm /* masm */,
+ noreg /* obj */,
+ dst /* pre_val */,
+ xthread /* thread */,
+ tmp1 /* tmp */,
+ true /* tosca_live */,
+ true /* expand_call */);
+ __ leave();
+ }
+}
+
+void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+ Address dst, Register val, Register tmp1, Register tmp2) {
+ assert_cond(masm != NULL);
+ // flatten object address if needed
+ if (dst.offset() == 0) {
+ if (dst.base() != x13) {
+ __ mv(x13, dst.base());
+ }
+ } else {
+ __ la(x13, dst);
+ }
+
+ g1_write_barrier_pre(masm,
+ x13 /* obj */,
+ tmp2 /* pre_val */,
+ xthread /* thread */,
+ tmp1 /* tmp */,
+ val != noreg /* tosca_live */,
+ false /* expand_call */);
+
+ if (val == noreg) {
+ BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg);
+ } else {
+ // G1 barrier needs uncompressed oop for region cross check.
+ Register new_val = val;
+ if (UseCompressedOops) {
+ new_val = t1;
+ __ mv(new_val, val);
+ }
+ BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg);
+ g1_write_barrier_post(masm,
+ x13 /* store_adr */,
+ new_val /* new_val */,
+ xthread /* thread */,
+ tmp1 /* tmp */,
+ tmp2 /* tmp2 */);
+ }
+}
+
+#ifdef COMPILER1
+
+#undef __
+#define __ ce->masm()->
+
+void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) {
+ G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
+
+ // At this point we know that marking is in progress.
+ // If do_load() is true then we have to emit the
+ // load of the previous value; otherwise it has already
+ // been loaded into _pre_val.
+ __ bind(*stub->entry());
+
+ assert(stub->pre_val()->is_register(), "Precondition.");
+
+ Register pre_val_reg = stub->pre_val()->as_register();
+
+ if (stub->do_load()) {
+ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */);
+ }
+ __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
+ ce->store_parameter(stub->pre_val()->as_register(), 0);
+ __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
+ __ j(*stub->continuation());
+}
+
+void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
+ G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
+ __ bind(*stub->entry());
+ assert(stub->addr()->is_register(), "Precondition");
+ assert(stub->new_val()->is_register(), "Precondition");
+ Register new_val_reg = stub->new_val()->as_register();
+ __ beqz(new_val_reg, *stub->continuation(), /* is_far */ true);
+ ce->store_parameter(stub->addr()->as_pointer_register(), 0);
+ __ far_call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin()));
+ __ j(*stub->continuation());
+}
+
+#undef __
+
+#define __ sasm->
+
+void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
+ __ prologue("g1_pre_barrier", false);
+
+ BarrierSet* bs = BarrierSet::barrier_set();
+
+ // arg0 : previous value of memory
+ const Register pre_val = x10;
+ const Register thread = xthread;
+ const Register tmp = t0;
+
+ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
+ Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
+ Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
+
+ Label done;
+ Label runtime;
+
+ // Is marking still active?
+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width
+ __ lwu(tmp, in_progress);
+ } else {
+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+ __ lbu(tmp, in_progress);
+ }
+ __ beqz(tmp, done);
+
+ // Can we store original value in the thread's buffer?
+ __ ld(tmp, queue_index);
+ __ beqz(tmp, runtime);
+
+ __ sub(tmp, tmp, wordSize);
+ __ sd(tmp, queue_index);
+ __ ld(t1, buffer);
+ __ add(tmp, tmp, t1);
+ __ load_parameter(0, t1);
+ __ sd(t1, Address(tmp, 0));
+ __ j(done);
+
+ __ bind(runtime);
+ __ push_call_clobbered_registers();
+ __ load_parameter(0, pre_val);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
+ __ pop_call_clobbered_registers();
+ __ bind(done);
+
+ __ epilogue();
+}
+
+void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
+ __ prologue("g1_post_barrier", false);
+
+ // arg0 : store_address
+ Address store_addr(fp, 2 * BytesPerWord); // 2 BytesPerWord from fp
+
+ BarrierSet* bs = BarrierSet::barrier_set();
+ CardTableBarrierSet* ctbs = barrier_set_cast(bs);
+ CardTable* ct = ctbs->card_table();
+
+ Label done;
+ Label runtime;
+
+ // At this point we know new_value is non-NULL and the new_value crosses regions.
+ // Must check to see if card is already dirty
+ const Register thread = xthread;
+
+ Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
+ Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
+
+ const Register card_offset = t1;
+ // RA is free here, so we can use it to hold the byte_map_base.
+ const Register byte_map_base = ra;
+
+ assert_different_registers(card_offset, byte_map_base, t0);
+
+ __ load_parameter(0, card_offset);
+ __ srli(card_offset, card_offset, CardTable::card_shift());
+ __ load_byte_map_base(byte_map_base);
+
+ // Convert card offset into an address in card_addr
+ Register card_addr = card_offset;
+ __ add(card_addr, byte_map_base, card_addr);
+
+ __ lbu(t0, Address(card_addr, 0));
+ __ sub(t0, t0, (int)G1CardTable::g1_young_card_val());
+ __ beqz(t0, done);
+
+ assert((int)CardTable::dirty_card_val() == 0, "must be 0");
+
+ __ membar(MacroAssembler::StoreLoad);
+ __ lbu(t0, Address(card_addr, 0));
+ __ beqz(t0, done);
+
+ // storing region crossing non-NULL, card is clean.
+ // dirty card and log.
+ __ sb(zr, Address(card_addr, 0));
+
+ __ ld(t0, queue_index);
+ __ beqz(t0, runtime);
+ __ sub(t0, t0, wordSize);
+ __ sd(t0, queue_index);
+
+ // Reuse RA to hold buffer_addr
+ const Register buffer_addr = ra;
+
+ __ ld(buffer_addr, buffer);
+ __ add(t0, buffer_addr, t0);
+ __ sd(card_addr, Address(t0, 0));
+ __ j(done);
+
+ __ bind(runtime);
+ __ push_call_clobbered_registers();
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
+ __ pop_call_clobbered_registers();
+ __ bind(done);
+ __ epilogue();
+}
+
+#undef __
+
+#endif // COMPILER1
diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..37bc183f39c76873b06ceb512ccf648935569b72
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
+#define CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
+
+#include "asm/macroAssembler.hpp"
+#include "gc/shared/modRefBarrierSetAssembler.hpp"
+#include "utilities/macros.hpp"
+
+#ifdef COMPILER1
+class LIR_Assembler;
+#endif
+class StubAssembler;
+class G1PreBarrierStub;
+class G1PostBarrierStub;
+
+class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
+protected:
+ void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
+ Register addr, Register count, RegSet saved_regs);
+ void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
+ Register start, Register count, Register tmp, RegSet saved_regs);
+
+ void g1_write_barrier_pre(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp,
+ bool tosca_live,
+ bool expand_call);
+
+ void g1_write_barrier_post(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp,
+ Register tmp2);
+
+ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+ Address dst, Register val, Register tmp1, Register tmp2);
+
+public:
+#ifdef COMPILER1
+ void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
+ void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
+
+ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
+ void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
+#endif
+
+ void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+ Register dst, Address src, Register tmp1, Register tmp_thread);
+};
+
+#endif // CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..8735fd014ffff76b820519b55bf3b7bd4a0133d8
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP
+#define CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP
+
+const size_t G1MergeHeapRootsPrefetchCacheSize = 16;
+
+#endif // CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3c115a2ea02a5e077e9e74e53715817e815dad0d
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "classfile/classLoaderData.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
+#include "gc/shared/barrierSetNMethod.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "interpreter/interp_masm.hpp"
+#include "memory/universe.hpp"
+#include "runtime/jniHandles.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/thread.hpp"
+
+#define __ masm->
+
+void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+ Register dst, Address src, Register tmp1, Register tmp_thread) {
+ assert_cond(masm != NULL);
+
+ // RA is live. It must be saved around calls.
+
+ bool in_heap = (decorators & IN_HEAP) != 0;
+ bool in_native = (decorators & IN_NATIVE) != 0;
+ bool is_not_null = (decorators & IS_NOT_NULL) != 0;
+ switch (type) {
+ case T_OBJECT: // fall through
+ case T_ARRAY: {
+ if (in_heap) {
+ if (UseCompressedOops) {
+ __ lwu(dst, src);
+ if (is_not_null) {
+ __ decode_heap_oop_not_null(dst);
+ } else {
+ __ decode_heap_oop(dst);
+ }
+ } else {
+ __ ld(dst, src);
+ }
+ } else {
+ assert(in_native, "why else?");
+ __ ld(dst, src);
+ }
+ break;
+ }
+ case T_BOOLEAN: __ load_unsigned_byte (dst, src); break;
+ case T_BYTE: __ load_signed_byte (dst, src); break;
+ case T_CHAR: __ load_unsigned_short(dst, src); break;
+ case T_SHORT: __ load_signed_short (dst, src); break;
+ case T_INT: __ lw (dst, src); break;
+ case T_LONG: __ ld (dst, src); break;
+ case T_ADDRESS: __ ld (dst, src); break;
+ case T_FLOAT: __ flw (f10, src); break;
+ case T_DOUBLE: __ fld (f10, src); break;
+ default: Unimplemented();
+ }
+}
+
+void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+ Address dst, Register val, Register tmp1, Register tmp2) {
+ assert_cond(masm != NULL);
+ bool in_heap = (decorators & IN_HEAP) != 0;
+ bool in_native = (decorators & IN_NATIVE) != 0;
+ switch (type) {
+ case T_OBJECT: // fall through
+ case T_ARRAY: {
+ val = val == noreg ? zr : val;
+ if (in_heap) {
+ if (UseCompressedOops) {
+ assert(!dst.uses(val), "not enough registers");
+ if (val != zr) {
+ __ encode_heap_oop(val);
+ }
+ __ sw(val, dst);
+ } else {
+ __ sd(val, dst);
+ }
+ } else {
+ assert(in_native, "why else?");
+ __ sd(val, dst);
+ }
+ break;
+ }
+ case T_BOOLEAN:
+ __ andi(val, val, 0x1); // boolean is true if LSB is 1
+ __ sb(val, dst);
+ break;
+ case T_BYTE: __ sb(val, dst); break;
+ case T_CHAR: __ sh(val, dst); break;
+ case T_SHORT: __ sh(val, dst); break;
+ case T_INT: __ sw(val, dst); break;
+ case T_LONG: __ sd(val, dst); break;
+ case T_ADDRESS: __ sd(val, dst); break;
+ case T_FLOAT: __ fsw(f10, dst); break;
+ case T_DOUBLE: __ fsd(f10, dst); break;
+ default: Unimplemented();
+ }
+
+}
+
+void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
+ Register obj, Register tmp, Label& slowpath) {
+ assert_cond(masm != NULL);
+ // If mask changes we need to ensure that the inverse is still encodable as an immediate
+ STATIC_ASSERT(JNIHandles::weak_tag_mask == 1);
+ __ andi(obj, obj, ~JNIHandles::weak_tag_mask);
+ __ ld(obj, Address(obj, 0)); // *obj
+}
+
+// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
+void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj,
+ Register var_size_in_bytes,
+ int con_size_in_bytes,
+ Register tmp1,
+ Register tmp2,
+ Label& slow_case,
+ bool is_far) {
+ assert_cond(masm != NULL);
+ assert_different_registers(obj, tmp2);
+ assert_different_registers(obj, var_size_in_bytes);
+ Register end = tmp2;
+
+ __ ld(obj, Address(xthread, JavaThread::tlab_top_offset()));
+ if (var_size_in_bytes == noreg) {
+ __ la(end, Address(obj, con_size_in_bytes));
+ } else {
+ __ add(end, obj, var_size_in_bytes);
+ }
+ __ ld(t0, Address(xthread, JavaThread::tlab_end_offset()));
+ __ bgtu(end, t0, slow_case, is_far);
+
+ // update the tlab top pointer
+ __ sd(end, Address(xthread, JavaThread::tlab_top_offset()));
+
+ // recover var_size_in_bytes if necessary
+ if (var_size_in_bytes == end) {
+ __ sub(var_size_in_bytes, var_size_in_bytes, obj);
+ }
+}
+
+// Defines obj, preserves var_size_in_bytes
+void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj,
+ Register var_size_in_bytes,
+ int con_size_in_bytes,
+ Register tmp1,
+ Label& slow_case,
+ bool is_far) {
+ assert_cond(masm != NULL);
+ assert_different_registers(obj, var_size_in_bytes, tmp1);
+ if (!Universe::heap()->supports_inline_contig_alloc()) {
+ __ j(slow_case);
+ } else {
+ Register end = tmp1;
+ Label retry;
+ __ bind(retry);
+
+ // Get the current end of the heap
+ ExternalAddress address_end((address) Universe::heap()->end_addr());
+ {
+ int32_t offset;
+ __ la_patchable(t1, address_end, offset);
+ __ ld(t1, Address(t1, offset));
+ }
+
+ // Get the current top of the heap
+ ExternalAddress address_top((address) Universe::heap()->top_addr());
+ {
+ int32_t offset;
+ __ la_patchable(t0, address_top, offset);
+ __ addi(t0, t0, offset);
+ __ lr_d(obj, t0, Assembler::aqrl);
+ }
+
+ // Adjust it my the size of our new object
+ if (var_size_in_bytes == noreg) {
+ __ la(end, Address(obj, con_size_in_bytes));
+ } else {
+ __ add(end, obj, var_size_in_bytes);
+ }
+
+ // if end < obj then we wrapped around high memory
+ __ bltu(end, obj, slow_case, is_far);
+
+ __ bgtu(end, t1, slow_case, is_far);
+
+ // If heap_top hasn't been changed by some other thread, update it.
+ __ sc_d(t1, end, t0, Assembler::rl);
+ __ bnez(t1, retry);
+
+ incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, tmp1);
+ }
+}
+
+void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm,
+ Register var_size_in_bytes,
+ int con_size_in_bytes,
+ Register tmp1) {
+ assert_cond(masm != NULL);
+ assert(tmp1->is_valid(), "need temp reg");
+
+ __ ld(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset())));
+ if (var_size_in_bytes->is_valid()) {
+ __ add(tmp1, tmp1, var_size_in_bytes);
+ } else {
+ __ add(tmp1, tmp1, con_size_in_bytes);
+ }
+ __ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset())));
+}
+
+void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
+ BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
+
+ if (bs_nm == NULL) {
+ return;
+ }
+
+ // RISCV atomic operations require that the memory address be naturally aligned.
+ __ align(4);
+
+ Label skip, guard;
+ Address thread_disarmed_addr(xthread, in_bytes(bs_nm->thread_disarmed_offset()));
+
+ __ lwu(t0, guard);
+
+ // Subsequent loads of oops must occur after load of guard value.
+ // BarrierSetNMethod::disarm sets guard with release semantics.
+ __ membar(MacroAssembler::LoadLoad);
+ __ lwu(t1, thread_disarmed_addr);
+ __ beq(t0, t1, skip);
+
+ int32_t offset = 0;
+ __ movptr_with_offset(t0, StubRoutines::riscv::method_entry_barrier(), offset);
+ __ jalr(ra, t0, offset);
+ __ j(skip);
+
+ __ bind(guard);
+
+ assert(__ offset() % 4 == 0, "bad alignment");
+ __ emit_int32(0); // nmethod guard value. Skipped over in common case.
+
+ __ bind(skip);
+}
+
+void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) {
+ BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod();
+ if (bs == NULL) {
+ return;
+ }
+
+ Label bad_call;
+ __ beqz(xmethod, bad_call);
+
+ // Pointer chase to the method holder to find out if the method is concurrently unloading.
+ Label method_live;
+ __ load_method_holder_cld(t0, xmethod);
+
+ // Is it a strong CLD?
+ __ lwu(t1, Address(t0, ClassLoaderData::keep_alive_offset()));
+ __ bnez(t1, method_live);
+
+ // Is it a weak but alive CLD?
+ __ push_reg(RegSet::of(x28, x29), sp);
+
+ __ ld(x28, Address(t0, ClassLoaderData::holder_offset()));
+
+ // Uses x28 & x29, so we must pass new temporaries.
+ __ resolve_weak_handle(x28, x29);
+ __ mv(t0, x28);
+
+ __ pop_reg(RegSet::of(x28, x29), sp);
+
+ __ bnez(t0, method_live);
+
+ __ bind(bad_call);
+
+ __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
+ __ bind(method_live);
+}
diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..b85f7f5582b2a8a4a0ee82f0e427d720b1cefd72
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
+#define CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
+
+#include "asm/macroAssembler.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/barrierSetNMethod.hpp"
+#include "memory/allocation.hpp"
+#include "oops/access.hpp"
+
+class BarrierSetAssembler: public CHeapObj {
+private:
+ void incr_allocated_bytes(MacroAssembler* masm,
+ Register var_size_in_bytes, int con_size_in_bytes,
+ Register t1 = noreg);
+
+public:
+ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
+ Register src, Register dst, Register count, RegSet saved_regs) {}
+ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
+ Register start, Register end, Register tmp, RegSet saved_regs) {}
+ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+ Register dst, Address src, Register tmp1, Register tmp_thread);
+ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+ Address dst, Register val, Register tmp1, Register tmp2);
+
+ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
+ Register obj, Register tmp, Label& slowpath);
+
+ virtual void tlab_allocate(MacroAssembler* masm,
+ Register obj, // result: pointer to object after successful allocation
+ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
+ int con_size_in_bytes, // object size in bytes if known at compile time
+ Register tmp1, // temp register
+ Register tmp2, // temp register
+ Label& slow_case, // continuation point if fast allocation fails
+ bool is_far = false
+ );
+
+ void eden_allocate(MacroAssembler* masm,
+ Register obj, // result: pointer to object after successful allocation
+ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
+ int con_size_in_bytes, // object size in bytes if known at compile time
+ Register tmp1, // temp register
+ Label& slow_case, // continuation point if fast allocation fails
+ bool is_far = false
+ );
+ virtual void barrier_stubs_init() {}
+
+ virtual void nmethod_entry_barrier(MacroAssembler* masm);
+ virtual void c2i_entry_barrier(MacroAssembler* masm);
+ virtual ~BarrierSetAssembler() {}
+};
+
+#endif // CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ae7ee4c5a44a04da679b16583b7a7a2fb4b8e9fd
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "code/codeCache.hpp"
+#include "code/nativeInst.hpp"
+#include "gc/shared/barrierSetNMethod.hpp"
+#include "logging/log.hpp"
+#include "memory/resourceArea.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/registerMap.hpp"
+#include "runtime/thread.hpp"
+#include "utilities/align.hpp"
+#include "utilities/debug.hpp"
+
+class NativeNMethodBarrier: public NativeInstruction {
+ address instruction_address() const { return addr_at(0); }
+
+ int *guard_addr() {
+ /* auipc + lwu + fence + lwu + beq + lui + addi + slli + addi + slli + jalr + j */
+ return reinterpret_cast(instruction_address() + 12 * 4);
+ }
+
+public:
+ int get_value() {
+ return Atomic::load_acquire(guard_addr());
+ }
+
+ void set_value(int value) {
+ Atomic::release_store(guard_addr(), value);
+ }
+
+ void verify() const;
+};
+
+// Store the instruction bitmask, bits and name for checking the barrier.
+struct CheckInsn {
+ uint32_t mask;
+ uint32_t bits;
+ const char *name;
+};
+
+static const struct CheckInsn barrierInsn[] = {
+ { 0x00000fff, 0x00000297, "auipc t0, 0 "},
+ { 0x000fffff, 0x0002e283, "lwu t0, 48(t0) "},
+ { 0xffffffff, 0x0aa0000f, "fence ir, ir "},
+ { 0x000fffff, 0x000be303, "lwu t1, 112(xthread)"},
+ { 0x01fff07f, 0x00628063, "beq t0, t1, skip "},
+ { 0x00000fff, 0x000002b7, "lui t0, imm0 "},
+ { 0x000fffff, 0x00028293, "addi t0, t0, imm1 "},
+ { 0xffffffff, 0x00b29293, "slli t0, t0, 11 "},
+ { 0x000fffff, 0x00028293, "addi t0, t0, imm2 "},
+ { 0xffffffff, 0x00529293, "slli t0, t0, 5 "},
+ { 0x000fffff, 0x000280e7, "jalr ra, imm3(t0) "},
+ { 0x00000fff, 0x0000006f, "j skip "}
+ /* guard: */
+ /* 32bit nmethod guard value */
+ /* skip: */
+};
+
+// The encodings must match the instructions emitted by
+// BarrierSetAssembler::nmethod_entry_barrier. The matching ignores the specific
+// register numbers and immediate values in the encoding.
+void NativeNMethodBarrier::verify() const {
+ intptr_t addr = (intptr_t) instruction_address();
+ for(unsigned int i = 0; i < sizeof(barrierInsn)/sizeof(struct CheckInsn); i++ ) {
+ uint32_t inst = *((uint32_t*) addr);
+ if ((inst & barrierInsn[i].mask) != barrierInsn[i].bits) {
+ tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", addr, inst);
+ fatal("not an %s instruction.", barrierInsn[i].name);
+ }
+ addr += 4;
+ }
+}
+
+
+/* We're called from an nmethod when we need to deoptimize it. We do
+ this by throwing away the nmethod's frame and jumping to the
+ ic_miss stub. This looks like there has been an IC miss at the
+ entry of the nmethod, so we resolve the call, which will fall back
+ to the interpreter if the nmethod has been unloaded. */
+void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) {
+
+ typedef struct {
+ intptr_t *sp; intptr_t *fp; address ra; address pc;
+ } frame_pointers_t;
+
+ frame_pointers_t *new_frame = (frame_pointers_t *)(return_address_ptr - 5);
+
+ JavaThread *thread = JavaThread::current();
+ RegisterMap reg_map(thread, false);
+ frame frame = thread->last_frame();
+
+ assert(frame.is_compiled_frame() || frame.is_native_frame(), "must be");
+ assert(frame.cb() == nm, "must be");
+ frame = frame.sender(®_map);
+
+ LogTarget(Trace, nmethod, barrier) out;
+ if (out.is_enabled()) {
+ ResourceMark mark;
+ log_trace(nmethod, barrier)("deoptimize(nmethod: %s(%p), return_addr: %p, osr: %d, thread: %p(%s), making rsp: %p) -> %p",
+ nm->method()->name_and_sig_as_C_string(),
+ nm, *(address *) return_address_ptr, nm->is_osr_method(), thread,
+ thread->name(), frame.sp(), nm->verified_entry_point());
+ }
+
+ new_frame->sp = frame.sp();
+ new_frame->fp = frame.fp();
+ new_frame->ra = frame.pc();
+ new_frame->pc = SharedRuntime::get_handle_wrong_method_stub();
+}
+
+// This is the offset of the entry barrier from where the frame is completed.
+// If any code changes between the end of the verified entry where the entry
+// barrier resides, and the completion of the frame, then
+// NativeNMethodCmpBarrier::verify() will immediately complain when it does
+// not find the expected native instruction at this offset, which needs updating.
+// Note that this offset is invariant of PreserveFramePointer.
+
+// see BarrierSetAssembler::nmethod_entry_barrier
+// auipc + lwu + fence + lwu + beq + movptr_with_offset(5 instructions) + jalr + j + int32
+static const int entry_barrier_offset = -4 * 13;
+
+static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) {
+ address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset;
+ NativeNMethodBarrier* barrier = reinterpret_cast(barrier_address);
+ debug_only(barrier->verify());
+ return barrier;
+}
+
+void BarrierSetNMethod::disarm(nmethod* nm) {
+ if (!supports_entry_barrier(nm)) {
+ return;
+ }
+
+ // Disarms the nmethod guard emitted by BarrierSetAssembler::nmethod_entry_barrier.
+ NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
+
+ barrier->set_value(disarmed_value());
+}
+
+bool BarrierSetNMethod::is_armed(nmethod* nm) {
+ if (!supports_entry_barrier(nm)) {
+ return false;
+ }
+
+ NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
+ return barrier->get_value() != disarmed_value();
+}
diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a419f92b5f6fc75a74f305dfb189475a6e14ac65
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/cardTable.hpp"
+#include "gc/shared/cardTableBarrierSet.hpp"
+#include "gc/shared/cardTableBarrierSetAssembler.hpp"
+#include "gc/shared/gc_globals.hpp"
+#include "interpreter/interp_masm.hpp"
+
+#define __ masm->
+
+
+void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Register tmp) {
+ assert_cond(masm != NULL);
+ assert_different_registers(obj, tmp);
+ BarrierSet* bs = BarrierSet::barrier_set();
+ assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind");
+
+ __ srli(obj, obj, CardTable::card_shift());
+
+ assert(CardTable::dirty_card_val() == 0, "must be");
+
+ __ load_byte_map_base(tmp);
+ __ add(tmp, obj, tmp);
+
+ if (UseCondCardMark) {
+ Label L_already_dirty;
+ __ membar(MacroAssembler::StoreLoad);
+ __ lbu(t1, Address(tmp));
+ __ beqz(t1, L_already_dirty);
+ __ sb(zr, Address(tmp));
+ __ bind(L_already_dirty);
+ } else {
+ __ sb(zr, Address(tmp));
+ }
+}
+
+void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
+ Register start, Register count, Register tmp, RegSet saved_regs) {
+ assert_cond(masm != NULL);
+ assert_different_registers(start, tmp);
+ assert_different_registers(count, tmp);
+
+ Label L_loop, L_done;
+ const Register end = count;
+
+ __ beqz(count, L_done); // zero count - nothing to do
+ // end = start + count << LogBytesPerHeapOop
+ __ shadd(end, count, start, count, LogBytesPerHeapOop);
+ __ sub(end, end, BytesPerHeapOop); // last element address to make inclusive
+
+ __ srli(start, start, CardTable::card_shift());
+ __ srli(end, end, CardTable::card_shift());
+ __ sub(count, end, start); // number of bytes to copy
+
+ __ load_byte_map_base(tmp);
+ __ add(start, start, tmp);
+
+ __ bind(L_loop);
+ __ add(tmp, start, count);
+ __ sb(zr, Address(tmp));
+ __ sub(count, count, 1);
+ __ bgez(count, L_loop);
+ __ bind(L_done);
+}
+
+void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+ Address dst, Register val, Register tmp1, Register tmp2) {
+ bool in_heap = (decorators & IN_HEAP) != 0;
+ bool is_array = (decorators & IS_ARRAY) != 0;
+ bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
+ bool precise = is_array || on_anonymous;
+
+ bool needs_post_barrier = val != noreg && in_heap;
+ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg);
+ if (needs_post_barrier) {
+ // flatten object address if needed
+ if (!precise || dst.offset() == 0) {
+ store_check(masm, dst.base(), x13);
+ } else {
+ assert_cond(masm != NULL);
+ __ la(x13, dst);
+ store_check(masm, x13, t0);
+ }
+ }
+}
diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..686fe8fa4786186f4d760b6193235ccdc56a2beb
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
+#define CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
+
+#include "asm/macroAssembler.hpp"
+#include "gc/shared/modRefBarrierSetAssembler.hpp"
+
+class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler {
+protected:
+ void store_check(MacroAssembler* masm, Register obj, Register tmp);
+
+ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
+ Register start, Register count, Register tmp, RegSet saved_regs);
+ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+ Address dst, Register val, Register tmp1, Register tmp2);
+};
+
+#endif // #ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7aa2015f9ec0ec5bae86425eb43a828833c846d5
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "gc/shared/modRefBarrierSetAssembler.hpp"
+
+#define __ masm->
+
+void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
+ Register src, Register dst, Register count, RegSet saved_regs) {
+
+ if (is_oop) {
+ gen_write_ref_array_pre_barrier(masm, decorators, dst, count, saved_regs);
+ }
+}
+
+void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
+ Register start, Register count, Register tmp,
+ RegSet saved_regs) {
+ if (is_oop) {
+ gen_write_ref_array_post_barrier(masm, decorators, start, count, tmp, saved_regs);
+ }
+}
+
+void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+ Address dst, Register val, Register tmp1, Register tmp2) {
+ if (is_reference_type(type)) {
+ oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2);
+ } else {
+ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
+ }
+}
diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..00419c3163c2da38ac65af6f32ecaf72d19ad572
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
+#define CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
+
+#include "asm/macroAssembler.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
+
+// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other
+// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected
+// accesses, which are overridden in the concrete BarrierSetAssembler.
+
+class ModRefBarrierSetAssembler: public BarrierSetAssembler {
+protected:
+ virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
+ Register addr, Register count, RegSet saved_regs) {}
+ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
+ Register start, Register count, Register tmp, RegSet saved_regs) {}
+
+ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+ Address dst, Register val, Register tmp1, Register tmp2) = 0;
+
+public:
+ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
+ Register src, Register dst, Register count, RegSet saved_regs);
+ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
+ Register start, Register count, Register tmp, RegSet saved_regs);
+ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+ Address dst, Register val, Register tmp1, Register tmp2);
+};
+
+#endif // CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..cd568cc723fe9d05a97a35de84c3df73b0bb1baf
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "gc/shared/gc_globals.hpp"
+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
+#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
+#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
+
+#define __ masm->masm()->
+
+void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler* masm) {
+ Register addr = _addr->as_register_lo();
+ Register newval = _new_value->as_register();
+ Register cmpval = _cmp_value->as_register();
+ Register tmp1 = _tmp1->as_register();
+ Register tmp2 = _tmp2->as_register();
+ Register result = result_opr()->as_register();
+
+ ShenandoahBarrierSet::assembler()->iu_barrier(masm->masm(), newval, t1);
+
+ if (UseCompressedOops) {
+ __ encode_heap_oop(tmp1, cmpval);
+ cmpval = tmp1;
+ __ encode_heap_oop(tmp2, newval);
+ newval = tmp2;
+ }
+
+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /* acquire */ Assembler::aq,
+ /* release */ Assembler::rl, /* is_cae */ false, result);
+}
+
+#undef __
+
+#ifdef ASSERT
+#define __ gen->lir(__FILE__, __LINE__)->
+#else
+#define __ gen->lir()->
+#endif
+
+LIR_Opr ShenandoahBarrierSetC1::atomic_cmpxchg_at_resolved(LIRAccess& access, LIRItem& cmp_value, LIRItem& new_value) {
+ BasicType bt = access.type();
+ if (access.is_oop()) {
+ LIRGenerator *gen = access.gen();
+ if (ShenandoahSATBBarrier) {
+ pre_barrier(gen, access.access_emit_info(), access.decorators(), access.resolved_addr(),
+ LIR_OprFact::illegalOpr /* pre_val */);
+ }
+ if (ShenandoahCASBarrier) {
+ cmp_value.load_item();
+ new_value.load_item();
+
+ LIR_Opr tmp1 = gen->new_register(T_OBJECT);
+ LIR_Opr tmp2 = gen->new_register(T_OBJECT);
+ LIR_Opr addr = access.resolved_addr()->as_address_ptr()->base();
+ LIR_Opr result = gen->new_register(T_INT);
+
+ __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), tmp1, tmp2, result));
+ return result;
+ }
+ }
+ return BarrierSetC1::atomic_cmpxchg_at_resolved(access, cmp_value, new_value);
+}
+
+LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRItem& value) {
+ LIRGenerator* gen = access.gen();
+ BasicType type = access.type();
+
+ LIR_Opr result = gen->new_register(type);
+ value.load_item();
+ LIR_Opr value_opr = value.result();
+
+ if (access.is_oop()) {
+ value_opr = iu_barrier(access.gen(), value_opr, access.access_emit_info(), access.decorators());
+ }
+
+ assert(type == T_INT || is_reference_type(type) LP64_ONLY( || type == T_LONG ), "unexpected type");
+ LIR_Opr tmp = gen->new_register(T_INT);
+ __ xchg(access.resolved_addr(), value_opr, result, tmp);
+
+ if (access.is_oop()) {
+ result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), access.decorators());
+ LIR_Opr tmp_opr = gen->new_register(type);
+ __ move(result, tmp_opr);
+ result = tmp_opr;
+ if (ShenandoahSATBBarrier) {
+ pre_barrier(access.gen(), access.access_emit_info(), access.decorators(), LIR_OprFact::illegalOpr,
+ result /* pre_val */);
+ }
+ }
+
+ return result;
+}
diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d0ac6e524364aa7acc8368b68e7ec4903bb40f14
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
@@ -0,0 +1,712 @@
+/*
+ * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
+#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
+#include "gc/shenandoah/shenandoahForwarding.hpp"
+#include "gc/shenandoah/shenandoahHeap.inline.hpp"
+#include "gc/shenandoah/shenandoahHeapRegion.hpp"
+#include "gc/shenandoah/shenandoahRuntime.hpp"
+#include "gc/shenandoah/shenandoahThreadLocalData.hpp"
+#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interp_masm.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/thread.hpp"
+#ifdef COMPILER1
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
+#endif
+
+#define __ masm->
+
+void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
+ Register src, Register dst, Register count, RegSet saved_regs) {
+ if (is_oop) {
+ bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
+ if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahIUBarrier || ShenandoahLoadRefBarrier) {
+
+ Label done;
+
+ // Avoid calling runtime if count == 0
+ __ beqz(count, done);
+
+ // Is GC active?
+ Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
+ assert_different_registers(src, dst, count, t0);
+
+ __ lbu(t0, gc_state);
+ if (ShenandoahSATBBarrier && dest_uninitialized) {
+ __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED);
+ __ beqz(t0, done);
+ } else {
+ __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING);
+ __ beqz(t0, done);
+ }
+
+ __ push_reg(saved_regs, sp);
+ if (UseCompressedOops) {
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry),
+ src, dst, count);
+ } else {
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), src, dst, count);
+ }
+ __ pop_reg(saved_regs, sp);
+ __ bind(done);
+ }
+ }
+}
+
+void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp,
+ bool tosca_live,
+ bool expand_call) {
+ if (ShenandoahSATBBarrier) {
+ satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call);
+ }
+}
+
+void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp,
+ bool tosca_live,
+ bool expand_call) {
+ // If expand_call is true then we expand the call_VM_leaf macro
+ // directly to skip generating the check by
+ // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
+ assert(thread == xthread, "must be");
+
+ Label done;
+ Label runtime;
+
+ assert_different_registers(obj, pre_val, tmp, t0);
+ assert(pre_val != noreg && tmp != noreg, "expecting a register");
+
+ Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset()));
+ Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
+ Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
+
+ // Is marking active?
+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
+ __ lwu(tmp, in_progress);
+ } else {
+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+ __ lbu(tmp, in_progress);
+ }
+ __ beqz(tmp, done);
+
+ // Do we need to load the previous value?
+ if (obj != noreg) {
+ __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
+ }
+
+ // Is the previous value null?
+ __ beqz(pre_val, done);
+
+ // Can we store original value in the thread's buffer?
+ // Is index == 0?
+ // (The index field is typed as size_t.)
+ __ ld(tmp, index); // tmp := *index_adr
+ __ beqz(tmp, runtime); // tmp == 0? If yes, goto runtime
+
+ __ sub(tmp, tmp, wordSize); // tmp := tmp - wordSize
+ __ sd(tmp, index); // *index_adr := tmp
+ __ ld(t0, buffer);
+ __ add(tmp, tmp, t0); // tmp := tmp + *buffer_adr
+
+ // Record the previous value
+ __ sd(pre_val, Address(tmp, 0));
+ __ j(done);
+
+ __ bind(runtime);
+ // save the live input values
+ RegSet saved = RegSet::of(pre_val);
+ if (tosca_live) saved += RegSet::of(x10);
+ if (obj != noreg) saved += RegSet::of(obj);
+
+ __ push_reg(saved, sp);
+
+ // Calling the runtime using the regular call_VM_leaf mechanism generates
+ // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
+ // that checks that the *(rfp+frame::interpreter_frame_last_sp) == NULL.
+ //
+ // If we care generating the pre-barrier without a frame (e.g. in the
+ // intrinsified Reference.get() routine) then ebp might be pointing to
+ // the caller frame and so this check will most likely fail at runtime.
+ //
+ // Expanding the call directly bypasses the generation of the check.
+ // So when we do not have have a full interpreter frame on the stack
+ // expand_call should be passed true.
+ if (expand_call) {
+ assert(pre_val != c_rarg1, "smashed arg");
+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
+ } else {
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
+ }
+
+ __ pop_reg(saved, sp);
+
+ __ bind(done);
+}
+
+void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) {
+ assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
+
+ Label is_null;
+ __ beqz(dst, is_null);
+ resolve_forward_pointer_not_null(masm, dst, tmp);
+ __ bind(is_null);
+}
+
+// IMPORTANT: This must preserve all registers, even t0 and t1, except those explicitely
+// passed in.
+void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) {
+ assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
+ // The below loads the mark word, checks if the lowest two bits are
+ // set, and if so, clear the lowest two bits and copy the result
+ // to dst. Otherwise it leaves dst alone.
+ // Implementing this is surprisingly awkward. I do it here by:
+ // - Inverting the mark word
+ // - Test lowest two bits == 0
+ // - If so, set the lowest two bits
+ // - Invert the result back, and copy to dst
+ RegSet saved_regs = RegSet::of(t2);
+ bool borrow_reg = (tmp == noreg);
+ if (borrow_reg) {
+ // No free registers available. Make one useful.
+ tmp = t0;
+ if (tmp == dst) {
+ tmp = t1;
+ }
+ saved_regs += RegSet::of(tmp);
+ }
+
+ assert_different_registers(tmp, dst, t2);
+ __ push_reg(saved_regs, sp);
+
+ Label done;
+ __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes()));
+ __ xori(tmp, tmp, -1); // eon with 0 is equivalent to XOR with -1
+ __ andi(t2, tmp, markWord::lock_mask_in_place);
+ __ bnez(t2, done);
+ __ ori(tmp, tmp, markWord::marked_value);
+ __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1
+ __ bind(done);
+
+ __ pop_reg(saved_regs, sp);
+}
+
+void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm,
+ Register dst,
+ Address load_addr,
+ DecoratorSet decorators) {
+ assert(ShenandoahLoadRefBarrier, "Should be enabled");
+ assert(dst != t1 && load_addr.base() != t1, "need t1");
+ assert_different_registers(load_addr.base(), t0, t1);
+
+ bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
+ bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
+ bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
+ bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
+ bool is_narrow = UseCompressedOops && !is_native;
+
+ Label heap_stable, not_cset;
+ __ enter();
+ Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
+ __ lbu(t1, gc_state);
+
+ // Check for heap stability
+ if (is_strong) {
+ __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED);
+ __ beqz(t1, heap_stable);
+ } else {
+ Label lrb;
+ __ andi(t0, t1, ShenandoahHeap::WEAK_ROOTS);
+ __ bnez(t0, lrb);
+ __ andi(t0, t1, ShenandoahHeap::HAS_FORWARDED);
+ __ beqz(t0, heap_stable);
+ __ bind(lrb);
+ }
+
+ // use x11 for load address
+ Register result_dst = dst;
+ if (dst == x11) {
+ __ mv(t1, dst);
+ dst = t1;
+ }
+
+ // Save x10 and x11, unless it is an output register
+ RegSet saved_regs = RegSet::of(x10, x11) - result_dst;
+ __ push_reg(saved_regs, sp);
+ __ la(x11, load_addr);
+ __ mv(x10, dst);
+
+ // Test for in-cset
+ if (is_strong) {
+ __ li(t1, (uint64_t)ShenandoahHeap::in_cset_fast_test_addr());
+ __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint());
+ __ add(t1, t1, t0);
+ __ lbu(t1, Address(t1));
+ __ andi(t0, t1, 1);
+ __ beqz(t0, not_cset);
+ }
+
+ __ push_call_clobbered_registers();
+ if (is_strong) {
+ if (is_narrow) {
+ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow);
+ } else {
+ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong);
+ }
+ } else if (is_weak) {
+ if (is_narrow) {
+ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow);
+ } else {
+ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak);
+ }
+ } else {
+ assert(is_phantom, "only remaining strength");
+ assert(!is_narrow, "phantom access cannot be narrow");
+ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak);
+ }
+ __ jalr(ra);
+ __ mv(t0, x10);
+ __ pop_call_clobbered_registers();
+ __ mv(x10, t0);
+ __ bind(not_cset);
+ __ mv(result_dst, x10);
+ __ pop_reg(saved_regs, sp);
+
+ __ bind(heap_stable);
+ __ leave();
+}
+
+void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register dst, Register tmp) {
+ if (ShenandoahIUBarrier) {
+ __ push_call_clobbered_registers();
+
+ satb_write_barrier_pre(masm, noreg, dst, xthread, tmp, true, false);
+
+ __ pop_call_clobbered_registers();
+ }
+}
+
+//
+// Arguments:
+//
+// Inputs:
+// src: oop location to load from, might be clobbered
+//
+// Output:
+// dst: oop loaded from src location
+//
+// Kill:
+// x30 (tmp reg)
+//
+// Alias:
+// dst: x30 (might use x30 as temporary output register to avoid clobbering src)
+//
+void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm,
+ DecoratorSet decorators,
+ BasicType type,
+ Register dst,
+ Address src,
+ Register tmp1,
+ Register tmp_thread) {
+ // 1: non-reference load, no additional barrier is needed
+ if (!is_reference_type(type)) {
+ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
+ return;
+ }
+
+ // 2: load a reference from src location and apply LRB if needed
+ if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
+ Register result_dst = dst;
+
+ // Preserve src location for LRB
+ RegSet saved_regs;
+ if (dst == src.base()) {
+ dst = (src.base() == x28) ? x29 : x28;
+ saved_regs = RegSet::of(dst);
+ __ push_reg(saved_regs, sp);
+ }
+ assert_different_registers(dst, src.base());
+
+ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
+
+ load_reference_barrier(masm, dst, src, decorators);
+
+ if (dst != result_dst) {
+ __ mv(result_dst, dst);
+ dst = result_dst;
+ }
+
+ if (saved_regs.bits() != 0) {
+ __ pop_reg(saved_regs, sp);
+ }
+ } else {
+ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
+ }
+
+ // 3: apply keep-alive barrier if needed
+ if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
+ __ enter();
+ __ push_call_clobbered_registers();
+ satb_write_barrier_pre(masm /* masm */,
+ noreg /* obj */,
+ dst /* pre_val */,
+ xthread /* thread */,
+ tmp1 /* tmp */,
+ true /* tosca_live */,
+ true /* expand_call */);
+ __ pop_call_clobbered_registers();
+ __ leave();
+ }
+}
+
+void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+ Address dst, Register val, Register tmp1, Register tmp2) {
+ bool on_oop = is_reference_type(type);
+ if (!on_oop) {
+ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
+ return;
+ }
+
+ // flatten object address if needed
+ if (dst.offset() == 0) {
+ if (dst.base() != x13) {
+ __ mv(x13, dst.base());
+ }
+ } else {
+ __ la(x13, dst);
+ }
+
+ shenandoah_write_barrier_pre(masm,
+ x13 /* obj */,
+ tmp2 /* pre_val */,
+ xthread /* thread */,
+ tmp1 /* tmp */,
+ val != noreg /* tosca_live */,
+ false /* expand_call */);
+
+ if (val == noreg) {
+ BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg);
+ } else {
+ iu_barrier(masm, val, tmp1);
+ // G1 barrier needs uncompressed oop for region cross check.
+ Register new_val = val;
+ if (UseCompressedOops) {
+ new_val = t1;
+ __ mv(new_val, val);
+ }
+ BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg);
+ }
+}
+
+void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
+ Register obj, Register tmp, Label& slowpath) {
+ Label done;
+ // Resolve jobject
+ BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath);
+
+ // Check for null.
+ __ beqz(obj, done);
+
+ assert(obj != t1, "need t1");
+ Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());
+ __ lbu(t1, gc_state);
+
+ // Check for heap in evacuation phase
+ __ andi(t0, t1, ShenandoahHeap::EVACUATION);
+ __ bnez(t0, slowpath);
+
+ __ bind(done);
+}
+
+// Special Shenandoah CAS implementation that handles false negatives due
+// to concurrent evacuation. The service is more complex than a
+// traditional CAS operation because the CAS operation is intended to
+// succeed if the reference at addr exactly matches expected or if the
+// reference at addr holds a pointer to a from-space object that has
+// been relocated to the location named by expected. There are two
+// races that must be addressed:
+// a) A parallel thread may mutate the contents of addr so that it points
+// to a different object. In this case, the CAS operation should fail.
+// b) A parallel thread may heal the contents of addr, replacing a
+// from-space pointer held in addr with the to-space pointer
+// representing the new location of the object.
+// Upon entry to cmpxchg_oop, it is assured that new_val equals NULL
+// or it refers to an object that is not being evacuated out of
+// from-space, or it refers to the to-space version of an object that
+// is being evacuated out of from-space.
+//
+// By default the value held in the result register following execution
+// of the generated code sequence is 0 to indicate failure of CAS,
+// non-zero to indicate success. If is_cae, the result is the value most
+// recently fetched from addr rather than a boolean success indicator.
+//
+// Clobbers t0, t1
+void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
+ Register addr,
+ Register expected,
+ Register new_val,
+ Assembler::Aqrl acquire,
+ Assembler::Aqrl release,
+ bool is_cae,
+ Register result) {
+ bool is_narrow = UseCompressedOops;
+ Assembler::operand_size size = is_narrow ? Assembler::uint32 : Assembler::int64;
+
+ assert_different_registers(addr, expected, t0, t1);
+ assert_different_registers(addr, new_val, t0, t1);
+
+ Label retry, success, fail, done;
+
+ __ bind(retry);
+
+ // Step1: Try to CAS.
+ __ cmpxchg(addr, expected, new_val, size, acquire, release, /* result */ t1);
+
+ // If success, then we are done.
+ __ beq(expected, t1, success);
+
+ // Step2: CAS failed, check the forwared pointer.
+ __ mv(t0, t1);
+
+ if (is_narrow) {
+ __ decode_heap_oop(t0, t0);
+ }
+ resolve_forward_pointer(masm, t0);
+
+ __ encode_heap_oop(t0, t0);
+
+ // Report failure when the forwarded oop was not expected.
+ __ bne(t0, expected, fail);
+
+ // Step 3: CAS again using the forwarded oop.
+ __ cmpxchg(addr, t1, new_val, size, acquire, release, /* result */ t0);
+
+ // Retry when failed.
+ __ bne(t0, t1, retry);
+
+ __ bind(success);
+ if (is_cae) {
+ __ mv(result, expected);
+ } else {
+ __ addi(result, zr, 1);
+ }
+ __ j(done);
+
+ __ bind(fail);
+ if (is_cae) {
+ __ mv(result, t0);
+ } else {
+ __ mv(result, zr);
+ }
+
+ __ bind(done);
+}
+
+#undef __
+
+#ifdef COMPILER1
+
+#define __ ce->masm()->
+
+void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
+ ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
+ // At this point we know that marking is in progress.
+ // If do_load() is true then we have to emit the
+ // load of the previous value; otherwise it has already
+ // been loaded into _pre_val.
+ __ bind(*stub->entry());
+
+ assert(stub->pre_val()->is_register(), "Precondition.");
+
+ Register pre_val_reg = stub->pre_val()->as_register();
+
+ if (stub->do_load()) {
+ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */);
+ }
+ __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
+ ce->store_parameter(stub->pre_val()->as_register(), 0);
+ __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
+ __ j(*stub->continuation());
+}
+
+void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce,
+ ShenandoahLoadReferenceBarrierStub* stub) {
+ ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
+ __ bind(*stub->entry());
+
+ DecoratorSet decorators = stub->decorators();
+ bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
+ bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
+ bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
+ bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
+
+ Register obj = stub->obj()->as_register();
+ Register res = stub->result()->as_register();
+ Register addr = stub->addr()->as_pointer_register();
+ Register tmp1 = stub->tmp1()->as_register();
+ Register tmp2 = stub->tmp2()->as_register();
+
+ assert(res == x10, "result must arrive in x10");
+ assert_different_registers(tmp1, tmp2, t0);
+
+ if (res != obj) {
+ __ mv(res, obj);
+ }
+
+ if (is_strong) {
+ // Check for object in cset.
+ __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr());
+ __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint());
+ __ add(tmp2, tmp2, tmp1);
+ __ lbu(tmp2, Address(tmp2));
+ __ beqz(tmp2, *stub->continuation(), true /* is_far */);
+ }
+
+ ce->store_parameter(res, 0);
+ ce->store_parameter(addr, 1);
+
+ if (is_strong) {
+ if (is_native) {
+ __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin()));
+ } else {
+ __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin()));
+ }
+ } else if (is_weak) {
+ __ far_call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin()));
+ } else {
+ assert(is_phantom, "only remaining strength");
+ __ far_call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin()));
+ }
+
+ __ j(*stub->continuation());
+}
+
+#undef __
+
+#define __ sasm->
+
+void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
+ __ prologue("shenandoah_pre_barrier", false);
+
+ // arg0 : previous value of memory
+
+ BarrierSet* bs = BarrierSet::barrier_set();
+
+ const Register pre_val = x10;
+ const Register thread = xthread;
+ const Register tmp = t0;
+
+ Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
+ Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
+
+ Label done;
+ Label runtime;
+
+ // Is marking still active?
+ Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
+ __ lb(tmp, gc_state);
+ __ andi(tmp, tmp, ShenandoahHeap::MARKING);
+ __ beqz(tmp, done);
+
+ // Can we store original value in the thread's buffer?
+ __ ld(tmp, queue_index);
+ __ beqz(tmp, runtime);
+
+ __ sub(tmp, tmp, wordSize);
+ __ sd(tmp, queue_index);
+ __ ld(t1, buffer);
+ __ add(tmp, tmp, t1);
+ __ load_parameter(0, t1);
+ __ sd(t1, Address(tmp, 0));
+ __ j(done);
+
+ __ bind(runtime);
+ __ push_call_clobbered_registers();
+ __ load_parameter(0, pre_val);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
+ __ pop_call_clobbered_registers();
+ __ bind(done);
+
+ __ epilogue();
+}
+
+void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm,
+ DecoratorSet decorators) {
+ __ prologue("shenandoah_load_reference_barrier", false);
+ // arg0 : object to be resolved
+
+ __ push_call_clobbered_registers();
+ __ load_parameter(0, x10);
+ __ load_parameter(1, x11);
+
+ bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
+ bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
+ bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
+ bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
+ if (is_strong) {
+ if (is_native) {
+ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong);
+ } else {
+ if (UseCompressedOops) {
+ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow);
+ } else {
+ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong);
+ }
+ }
+ } else if (is_weak) {
+ assert(!is_native, "weak must not be called off-heap");
+ if (UseCompressedOops) {
+ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow);
+ } else {
+ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak);
+ }
+ } else {
+ assert(is_phantom, "only remaining strength");
+ assert(is_native, "phantom must only be called off-heap");
+ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_phantom);
+ }
+ __ jalr(ra);
+ __ mv(t0, x10);
+ __ pop_call_clobbered_registers();
+ __ mv(x10, t0);
+
+ __ epilogue();
+}
+
+#undef __
+
+#endif // COMPILER1
diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..a705f49766777d5207d0695703375379b97c92c3
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
+#define CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
+
+#include "asm/macroAssembler.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
+#ifdef COMPILER1
+class LIR_Assembler;
+class ShenandoahPreBarrierStub;
+class ShenandoahLoadReferenceBarrierStub;
+class StubAssembler;
+#endif
+class StubCodeGenerator;
+
+class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
+private:
+
+ void satb_write_barrier_pre(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp,
+ bool tosca_live,
+ bool expand_call);
+ void shenandoah_write_barrier_pre(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp,
+ bool tosca_live,
+ bool expand_call);
+
+ void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg);
+ void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg);
+ void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators);
+
+public:
+
+ void iu_barrier(MacroAssembler* masm, Register dst, Register tmp);
+
+#ifdef COMPILER1
+ void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
+ void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
+ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
+ void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators);
+#endif
+
+ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
+ Register src, Register dst, Register count, RegSet saved_regs);
+
+ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+ Register dst, Address src, Register tmp1, Register tmp_thread);
+ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+ Address dst, Register val, Register tmp1, Register tmp2);
+
+ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
+ Register obj, Register tmp, Label& slowpath);
+
+ void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
+ Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result);
+};
+
+#endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
new file mode 100644
index 0000000000000000000000000000000000000000..6c855f23c2af1b8bf850086fb9b670893c6aa145
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
@@ -0,0 +1,285 @@
+//
+// Copyright (c) 2018, Red Hat, Inc. All rights reserved.
+// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+source_hpp %{
+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
+#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
+%}
+
+instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
+ match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
+ ins_cost(10 * DEFAULT_COST);
+
+ effect(TEMP tmp, KILL cr);
+
+ format %{
+ "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapP_shenandoah"
+ %}
+
+ ins_encode %{
+ Register tmp = $tmp$$Register;
+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+ Assembler::relaxed /* acquire */, Assembler::rl /* release */,
+ false /* is_cae */, $res$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
+ match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
+ ins_cost(10 * DEFAULT_COST);
+
+ effect(TEMP tmp, KILL cr);
+
+ format %{
+ "cmpxchgw_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapN_shenandoah"
+ %}
+
+ ins_encode %{
+ Register tmp = $tmp$$Register;
+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+ Assembler::relaxed /* acquire */, Assembler::rl /* release */,
+ false /* is_cae */, $res$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
+ predicate(needs_acquiring_load_reserved(n));
+ match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
+ ins_cost(10 * DEFAULT_COST);
+
+ effect(TEMP tmp, KILL cr);
+
+ format %{
+ "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapPAcq_shenandoah"
+ %}
+
+ ins_encode %{
+ Register tmp = $tmp$$Register;
+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+ Assembler::aq /* acquire */, Assembler::rl /* release */,
+ false /* is_cae */, $res$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
+ predicate(needs_acquiring_load_reserved(n));
+ match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
+ ins_cost(10 * DEFAULT_COST);
+
+ effect(TEMP tmp, KILL cr);
+
+ format %{
+ "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapNAcq_shenandoah"
+ %}
+
+ ins_encode %{
+ Register tmp = $tmp$$Register;
+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+ Assembler::aq /* acquire */, Assembler::rl /* release */,
+ false /* is_cae */, $res$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
+ match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval)));
+ ins_cost(10 * DEFAULT_COST);
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
+
+ format %{
+ "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN_shenandoah"
+ %}
+
+ ins_encode %{
+ Register tmp = $tmp$$Register;
+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+ Assembler::relaxed /* acquire */, Assembler::rl /* release */,
+ true /* is_cae */, $res$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
+ match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval)));
+ ins_cost(10 * DEFAULT_COST);
+
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
+ format %{
+ "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndExchangeP_shenandoah"
+ %}
+
+ ins_encode %{
+ Register tmp = $tmp$$Register;
+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+ Assembler::relaxed /* acquire */, Assembler::rl /* release */,
+ true /* is_cae */, $res$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
+ match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval)));
+ ins_cost(10 * DEFAULT_COST);
+
+ effect(TEMP tmp, KILL cr);
+ format %{
+ "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapN_shenandoah"
+ "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+ %}
+
+ ins_encode %{
+ Register tmp = $tmp$$Register;
+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
+ // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+ Assembler::relaxed /* acquire */, Assembler::rl /* release */,
+ false /* is_cae */, $res$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
+ predicate(needs_acquiring_load_reserved(n));
+ match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval)));
+ ins_cost(10 * DEFAULT_COST);
+
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
+ format %{
+ "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq_shenandoah"
+ %}
+
+ ins_encode %{
+ Register tmp = $tmp$$Register;
+ __ mv(tmp, $oldval$$Register);
+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+ Assembler::aq /* acquire */, Assembler::rl /* release */,
+ true /* is_cae */, $res$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
+ predicate(needs_acquiring_load_reserved(n));
+ match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval)));
+ ins_cost(10 * DEFAULT_COST);
+
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
+ format %{
+ "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq_shenandoah"
+ %}
+
+ ins_encode %{
+ Register tmp = $tmp$$Register;
+ __ mv(tmp, $oldval$$Register);
+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+ Assembler::aq /* acquire */, Assembler::rl /* release */,
+ true /* is_cae */, $res$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
+ match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
+ ins_cost(10 * DEFAULT_COST);
+
+ effect(TEMP tmp, KILL cr);
+ format %{
+ "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapP_shenandoah"
+ %}
+
+ ins_encode %{
+ Register tmp = $tmp$$Register;
+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+ Assembler::relaxed /* acquire */, Assembler::rl /* release */,
+ false /* is_cae */, $res$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
+ predicate(needs_acquiring_load_reserved(n));
+ match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval)));
+ ins_cost(10 * DEFAULT_COST);
+
+ effect(TEMP tmp, KILL cr);
+ format %{
+ "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapNAcq_shenandoah"
+ "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+ %}
+
+ ins_encode %{
+ Register tmp = $tmp$$Register;
+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
+ // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+ Assembler::aq /* acquire */, Assembler::rl /* release */,
+ false /* is_cae */, $res$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
+ predicate(needs_acquiring_load_reserved(n));
+ match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
+ ins_cost(10 * DEFAULT_COST);
+
+ effect(TEMP tmp, KILL cr);
+ format %{
+ "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapPAcq_shenandoah"
+ "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+ %}
+
+ ins_encode %{
+ Register tmp = $tmp$$Register;
+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
+ // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+ Assembler::aq /* acquire */, Assembler::rl /* release */,
+ false /* is_cae */, $res$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3d3f4d4d7741b0f99c324a9c2b0b70e1f80d0170
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
@@ -0,0 +1,441 @@
+/*
+ * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "code/codeBlob.hpp"
+#include "code/vmreg.inline.hpp"
+#include "gc/z/zBarrier.inline.hpp"
+#include "gc/z/zBarrierSet.hpp"
+#include "gc/z/zBarrierSetAssembler.hpp"
+#include "gc/z/zBarrierSetRuntime.hpp"
+#include "gc/z/zThreadLocalData.hpp"
+#include "memory/resourceArea.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "utilities/macros.hpp"
+#ifdef COMPILER1
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "gc/z/c1/zBarrierSetC1.hpp"
+#endif // COMPILER1
+#ifdef COMPILER2
+#include "gc/z/c2/zBarrierSetC2.hpp"
+#endif // COMPILER2
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#else
+#define BLOCK_COMMENT(str) __ block_comment(str)
+#endif
+
+#undef __
+#define __ masm->
+
+void ZBarrierSetAssembler::load_at(MacroAssembler* masm,
+ DecoratorSet decorators,
+ BasicType type,
+ Register dst,
+ Address src,
+ Register tmp1,
+ Register tmp_thread) {
+ if (!ZBarrierSet::barrier_needed(decorators, type)) {
+ // Barrier not needed
+ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
+ return;
+ }
+
+ assert_different_registers(t1, src.base());
+ assert_different_registers(t0, t1, dst);
+
+ Label done;
+
+ // Load bad mask into temp register.
+ __ la(t0, src);
+ __ ld(t1, address_bad_mask_from_thread(xthread));
+ __ ld(dst, Address(t0));
+
+ // Test reference against bad mask. If mask bad, then we need to fix it up.
+ __ andr(t1, dst, t1);
+ __ beqz(t1, done);
+
+ __ enter();
+
+ __ push_call_clobbered_registers_except(RegSet::of(dst));
+
+ if (c_rarg0 != dst) {
+ __ mv(c_rarg0, dst);
+ }
+
+ __ mv(c_rarg1, t0);
+
+ __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2);
+
+ // Make sure dst has the return value.
+ if (dst != x10) {
+ __ mv(dst, x10);
+ }
+
+ __ pop_call_clobbered_registers_except(RegSet::of(dst));
+ __ leave();
+
+ __ bind(done);
+}
+
+#ifdef ASSERT
+
+void ZBarrierSetAssembler::store_at(MacroAssembler* masm,
+ DecoratorSet decorators,
+ BasicType type,
+ Address dst,
+ Register val,
+ Register tmp1,
+ Register tmp2) {
+ // Verify value
+ if (is_reference_type(type)) {
+ // Note that src could be noreg, which means we
+ // are storing null and can skip verification.
+ if (val != noreg) {
+ Label done;
+
+ // tmp1 and tmp2 are often set to noreg.
+ RegSet savedRegs = RegSet::of(t0);
+ __ push_reg(savedRegs, sp);
+
+ __ ld(t0, address_bad_mask_from_thread(xthread));
+ __ andr(t0, val, t0);
+ __ beqz(t0, done);
+ __ stop("Verify oop store failed");
+ __ should_not_reach_here();
+ __ bind(done);
+ __ pop_reg(savedRegs, sp);
+ }
+ }
+
+ // Store value
+ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
+}
+
+#endif // ASSERT
+
+void ZBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm,
+ DecoratorSet decorators,
+ bool is_oop,
+ Register src,
+ Register dst,
+ Register count,
+ RegSet saved_regs) {
+ if (!is_oop) {
+ // Barrier not needed
+ return;
+ }
+
+ BLOCK_COMMENT("ZBarrierSetAssembler::arraycopy_prologue {");
+
+ assert_different_registers(src, count, t0);
+
+ __ push_reg(saved_regs, sp);
+
+ if (count == c_rarg0 && src == c_rarg1) {
+ // exactly backwards!!
+ __ xorr(c_rarg0, c_rarg0, c_rarg1);
+ __ xorr(c_rarg1, c_rarg0, c_rarg1);
+ __ xorr(c_rarg0, c_rarg0, c_rarg1);
+ } else {
+ __ mv(c_rarg0, src);
+ __ mv(c_rarg1, count);
+ }
+
+ __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_array_addr(), 2);
+
+ __ pop_reg(saved_regs, sp);
+
+ BLOCK_COMMENT("} ZBarrierSetAssembler::arraycopy_prologue");
+}
+
+void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm,
+ Register jni_env,
+ Register robj,
+ Register tmp,
+ Label& slowpath) {
+ BLOCK_COMMENT("ZBarrierSetAssembler::try_resolve_jobject_in_native {");
+
+ assert_different_registers(jni_env, robj, tmp);
+
+ // Resolve jobject
+ BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, robj, tmp, slowpath);
+
+ // Compute the offset of address bad mask from the field of jni_environment
+ long int bad_mask_relative_offset = (long int) (in_bytes(ZThreadLocalData::address_bad_mask_offset()) -
+ in_bytes(JavaThread::jni_environment_offset()));
+
+ // Load the address bad mask
+ __ ld(tmp, Address(jni_env, bad_mask_relative_offset));
+
+ // Check address bad mask
+ __ andr(tmp, robj, tmp);
+ __ bnez(tmp, slowpath);
+
+ BLOCK_COMMENT("} ZBarrierSetAssembler::try_resolve_jobject_in_native");
+}
+
+#ifdef COMPILER2
+
+OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
+ if (!OptoReg::is_reg(opto_reg)) {
+ return OptoReg::Bad;
+ }
+
+ const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
+ if (vm_reg->is_FloatRegister()) {
+ return opto_reg & ~1;
+ }
+
+ return opto_reg;
+}
+
+#undef __
+#define __ _masm->
+
+class ZSaveLiveRegisters {
+private:
+ MacroAssembler* const _masm;
+ RegSet _gp_regs;
+ FloatRegSet _fp_regs;
+ VectorRegSet _vp_regs;
+
+public:
+ void initialize(ZLoadBarrierStubC2* stub) {
+ // Record registers that needs to be saved/restored
+ RegMaskIterator rmi(stub->live());
+ while (rmi.has_next()) {
+ const OptoReg::Name opto_reg = rmi.next();
+ if (OptoReg::is_reg(opto_reg)) {
+ const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
+ if (vm_reg->is_Register()) {
+ _gp_regs += RegSet::of(vm_reg->as_Register());
+ } else if (vm_reg->is_FloatRegister()) {
+ _fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
+ } else if (vm_reg->is_VectorRegister()) {
+ const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~(VectorRegisterImpl::max_slots_per_register - 1));
+ _vp_regs += VectorRegSet::of(vm_reg_base->as_VectorRegister());
+ } else {
+ fatal("Unknown register type");
+ }
+ }
+ }
+
+ // Remove C-ABI SOE registers, tmp regs and _ref register that will be updated
+ _gp_regs -= RegSet::range(x18, x27) + RegSet::of(x2) + RegSet::of(x8, x9) + RegSet::of(x5, stub->ref());
+ }
+
+ ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
+ _masm(masm),
+ _gp_regs(),
+ _fp_regs(),
+ _vp_regs() {
+ // Figure out what registers to save/restore
+ initialize(stub);
+
+ // Save registers
+ __ push_reg(_gp_regs, sp);
+ __ push_fp(_fp_regs, sp);
+ __ push_vp(_vp_regs, sp);
+ }
+
+ ~ZSaveLiveRegisters() {
+ // Restore registers
+ __ pop_vp(_vp_regs, sp);
+ __ pop_fp(_fp_regs, sp);
+ __ pop_reg(_gp_regs, sp);
+ }
+};
+
+class ZSetupArguments {
+private:
+ MacroAssembler* const _masm;
+ const Register _ref;
+ const Address _ref_addr;
+
+public:
+ ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
+ _masm(masm),
+ _ref(stub->ref()),
+ _ref_addr(stub->ref_addr()) {
+
+ // Setup arguments
+ if (_ref_addr.base() == noreg) {
+ // No self healing
+ if (_ref != c_rarg0) {
+ __ mv(c_rarg0, _ref);
+ }
+ __ mv(c_rarg1, zr);
+ } else {
+ // Self healing
+ if (_ref == c_rarg0) {
+ // _ref is already at correct place
+ __ la(c_rarg1, _ref_addr);
+ } else if (_ref != c_rarg1) {
+ // _ref is in wrong place, but not in c_rarg1, so fix it first
+ __ la(c_rarg1, _ref_addr);
+ __ mv(c_rarg0, _ref);
+ } else if (_ref_addr.base() != c_rarg0) {
+ assert(_ref == c_rarg1, "Mov ref first, vacating c_rarg0");
+ __ mv(c_rarg0, _ref);
+ __ la(c_rarg1, _ref_addr);
+ } else {
+ assert(_ref == c_rarg1, "Need to vacate c_rarg1 and _ref_addr is using c_rarg0");
+ if (_ref_addr.base() == c_rarg0) {
+ __ mv(t1, c_rarg1);
+ __ la(c_rarg1, _ref_addr);
+ __ mv(c_rarg0, t1);
+ } else {
+ ShouldNotReachHere();
+ }
+ }
+ }
+ }
+
+ ~ZSetupArguments() {
+ // Transfer result
+ if (_ref != x10) {
+ __ mv(_ref, x10);
+ }
+ }
+};
+
+#undef __
+#define __ masm->
+
+void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const {
+ BLOCK_COMMENT("ZLoadBarrierStubC2");
+
+ // Stub entry
+ __ bind(*stub->entry());
+
+ {
+ ZSaveLiveRegisters save_live_registers(masm, stub);
+ ZSetupArguments setup_arguments(masm, stub);
+ int32_t offset = 0;
+ __ la_patchable(t0, stub->slow_path(), offset);
+ __ jalr(x1, t0, offset);
+ }
+
+ // Stub exit
+ __ j(*stub->continuation());
+}
+
+#undef __
+
+#endif // COMPILER2
+
+#ifdef COMPILER1
+#undef __
+#define __ ce->masm()->
+
+void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce,
+ LIR_Opr ref) const {
+ assert_different_registers(xthread, ref->as_register(), t1);
+ __ ld(t1, address_bad_mask_from_thread(xthread));
+ __ andr(t1, t1, ref->as_register());
+}
+
+void ZBarrierSetAssembler::generate_c1_load_barrier_stub(LIR_Assembler* ce,
+ ZLoadBarrierStubC1* stub) const {
+ // Stub entry
+ __ bind(*stub->entry());
+
+ Register ref = stub->ref()->as_register();
+ Register ref_addr = noreg;
+ Register tmp = noreg;
+
+ if (stub->tmp()->is_valid()) {
+ // Load address into tmp register
+ ce->leal(stub->ref_addr(), stub->tmp());
+ ref_addr = tmp = stub->tmp()->as_pointer_register();
+ } else {
+ // Address already in register
+ ref_addr = stub->ref_addr()->as_address_ptr()->base()->as_pointer_register();
+ }
+
+ assert_different_registers(ref, ref_addr, noreg);
+
+ // Save x10 unless it is the result or tmp register
+ // Set up SP to accomodate parameters and maybe x10.
+ if (ref != x10 && tmp != x10) {
+ __ sub(sp, sp, 32);
+ __ sd(x10, Address(sp, 16));
+ } else {
+ __ sub(sp, sp, 16);
+ }
+
+ // Setup arguments and call runtime stub
+ ce->store_parameter(ref_addr, 1);
+ ce->store_parameter(ref, 0);
+
+ __ far_call(stub->runtime_stub());
+
+ // Verify result
+ __ verify_oop(x10, "Bad oop");
+
+
+ // Move result into place
+ if (ref != x10) {
+ __ mv(ref, x10);
+ }
+
+ // Restore x10 unless it is the result or tmp register
+ if (ref != x10 && tmp != x10) {
+ __ ld(x10, Address(sp, 16));
+ __ add(sp, sp, 32);
+ } else {
+ __ add(sp, sp, 16);
+ }
+
+ // Stub exit
+ __ j(*stub->continuation());
+}
+
+#undef __
+#define __ sasm->
+
+void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler* sasm,
+ DecoratorSet decorators) const {
+ __ prologue("zgc_load_barrier stub", false);
+
+ __ push_call_clobbered_registers_except(RegSet::of(x10));
+
+ // Setup arguments
+ __ load_parameter(0, c_rarg0);
+ __ load_parameter(1, c_rarg1);
+
+ __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2);
+
+ __ pop_call_clobbered_registers_except(RegSet::of(x10));
+
+ __ epilogue();
+}
+
+#undef __
+#endif // COMPILER1
diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..dc07ab635fed9d6077e0350d03f52453922ddbd6
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP
+#define CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP
+
+#include "code/vmreg.hpp"
+#include "oops/accessDecorators.hpp"
+#ifdef COMPILER2
+#include "opto/optoreg.hpp"
+#endif // COMPILER2
+
+#ifdef COMPILER1
+class LIR_Assembler;
+class LIR_Opr;
+class StubAssembler;
+class ZLoadBarrierStubC1;
+#endif // COMPILER1
+
+#ifdef COMPILER2
+class Node;
+class ZLoadBarrierStubC2;
+#endif // COMPILER2
+
+class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase {
+public:
+ virtual void load_at(MacroAssembler* masm,
+ DecoratorSet decorators,
+ BasicType type,
+ Register dst,
+ Address src,
+ Register tmp1,
+ Register tmp_thread);
+
+#ifdef ASSERT
+ virtual void store_at(MacroAssembler* masm,
+ DecoratorSet decorators,
+ BasicType type,
+ Address dst,
+ Register val,
+ Register tmp1,
+ Register tmp2);
+#endif // ASSERT
+
+ virtual void arraycopy_prologue(MacroAssembler* masm,
+ DecoratorSet decorators,
+ bool is_oop,
+ Register src,
+ Register dst,
+ Register count,
+ RegSet saved_regs);
+
+ virtual void try_resolve_jobject_in_native(MacroAssembler* masm,
+ Register jni_env,
+ Register robj,
+ Register tmp,
+ Label& slowpath);
+
+#ifdef COMPILER1
+ void generate_c1_load_barrier_test(LIR_Assembler* ce,
+ LIR_Opr ref) const;
+
+ void generate_c1_load_barrier_stub(LIR_Assembler* ce,
+ ZLoadBarrierStubC1* stub) const;
+
+ void generate_c1_load_barrier_runtime_stub(StubAssembler* sasm,
+ DecoratorSet decorators) const;
+#endif // COMPILER1
+
+#ifdef COMPILER2
+ OptoReg::Name refine_register(const Node* node,
+ OptoReg::Name opto_reg);
+
+ void generate_c2_load_barrier_stub(MacroAssembler* masm,
+ ZLoadBarrierStubC2* stub) const;
+#endif // COMPILER2
+};
+
+#endif // CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d14997790afaa108ccc80e28562d2e9d822be7b4
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc/shared/gcLogPrecious.hpp"
+#include "gc/shared/gc_globals.hpp"
+#include "gc/z/zGlobals.hpp"
+#include "runtime/globals.hpp"
+#include "runtime/os.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/powerOfTwo.hpp"
+
+#ifdef LINUX
+#include
+#endif // LINUX
+
+//
+// The heap can have three different layouts, depending on the max heap size.
+//
+// Address Space & Pointer Layout 1
+// --------------------------------
+//
+// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB)
+// . .
+// . .
+// . .
+// +--------------------------------+ 0x0000014000000000 (20TB)
+// | Remapped View |
+// +--------------------------------+ 0x0000010000000000 (16TB)
+// . .
+// +--------------------------------+ 0x00000c0000000000 (12TB)
+// | Marked1 View |
+// +--------------------------------+ 0x0000080000000000 (8TB)
+// | Marked0 View |
+// +--------------------------------+ 0x0000040000000000 (4TB)
+// . .
+// +--------------------------------+ 0x0000000000000000
+//
+// 6 4 4 4 4
+// 3 6 5 2 1 0
+// +--------------------+----+-----------------------------------------------+
+// |00000000 00000000 00|1111|11 11111111 11111111 11111111 11111111 11111111|
+// +--------------------+----+-----------------------------------------------+
+// | | |
+// | | * 41-0 Object Offset (42-bits, 4TB address space)
+// | |
+// | * 45-42 Metadata Bits (4-bits) 0001 = Marked0 (Address view 4-8TB)
+// | 0010 = Marked1 (Address view 8-12TB)
+// | 0100 = Remapped (Address view 16-20TB)
+// | 1000 = Finalizable (Address view N/A)
+// |
+// * 63-46 Fixed (18-bits, always zero)
+//
+//
+// Address Space & Pointer Layout 2
+// --------------------------------
+//
+// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB)
+// . .
+// . .
+// . .
+// +--------------------------------+ 0x0000280000000000 (40TB)
+// | Remapped View |
+// +--------------------------------+ 0x0000200000000000 (32TB)
+// . .
+// +--------------------------------+ 0x0000180000000000 (24TB)
+// | Marked1 View |
+// +--------------------------------+ 0x0000100000000000 (16TB)
+// | Marked0 View |
+// +--------------------------------+ 0x0000080000000000 (8TB)
+// . .
+// +--------------------------------+ 0x0000000000000000
+//
+// 6 4 4 4 4
+// 3 7 6 3 2 0
+// +------------------+-----+------------------------------------------------+
+// |00000000 00000000 0|1111|111 11111111 11111111 11111111 11111111 11111111|
+// +-------------------+----+------------------------------------------------+
+// | | |
+// | | * 42-0 Object Offset (43-bits, 8TB address space)
+// | |
+// | * 46-43 Metadata Bits (4-bits) 0001 = Marked0 (Address view 8-16TB)
+// | 0010 = Marked1 (Address view 16-24TB)
+// | 0100 = Remapped (Address view 32-40TB)
+// | 1000 = Finalizable (Address view N/A)
+// |
+// * 63-47 Fixed (17-bits, always zero)
+//
+//
+// Address Space & Pointer Layout 3
+// --------------------------------
+//
+// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB)
+// . .
+// . .
+// . .
+// +--------------------------------+ 0x0000500000000000 (80TB)
+// | Remapped View |
+// +--------------------------------+ 0x0000400000000000 (64TB)
+// . .
+// +--------------------------------+ 0x0000300000000000 (48TB)
+// | Marked1 View |
+// +--------------------------------+ 0x0000200000000000 (32TB)
+// | Marked0 View |
+// +--------------------------------+ 0x0000100000000000 (16TB)
+// . .
+// +--------------------------------+ 0x0000000000000000
+//
+// 6 4 4 4 4
+// 3 8 7 4 3 0
+// +------------------+----+-------------------------------------------------+
+// |00000000 00000000 |1111|1111 11111111 11111111 11111111 11111111 11111111|
+// +------------------+----+-------------------------------------------------+
+// | | |
+// | | * 43-0 Object Offset (44-bits, 16TB address space)
+// | |
+// | * 47-44 Metadata Bits (4-bits) 0001 = Marked0 (Address view 16-32TB)
+// | 0010 = Marked1 (Address view 32-48TB)
+// | 0100 = Remapped (Address view 64-80TB)
+// | 1000 = Finalizable (Address view N/A)
+// |
+// * 63-48 Fixed (16-bits, always zero)
+//
+
+// Default value if probing is not implemented for a certain platform: 128TB
+static const size_t DEFAULT_MAX_ADDRESS_BIT = 47;
+// Minimum value returned, if probing fails: 64GB
+static const size_t MINIMUM_MAX_ADDRESS_BIT = 36;
+
+static size_t probe_valid_max_address_bit() {
+#ifdef LINUX
+ size_t max_address_bit = 0;
+ const size_t page_size = os::vm_page_size();
+ for (size_t i = DEFAULT_MAX_ADDRESS_BIT; i > MINIMUM_MAX_ADDRESS_BIT; --i) {
+ const uintptr_t base_addr = ((uintptr_t) 1U) << i;
+ if (msync((void*)base_addr, page_size, MS_ASYNC) == 0) {
+ // msync suceeded, the address is valid, and maybe even already mapped.
+ max_address_bit = i;
+ break;
+ }
+ if (errno != ENOMEM) {
+ // Some error occured. This should never happen, but msync
+ // has some undefined behavior, hence ignore this bit.
+#ifdef ASSERT
+ fatal("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno));
+#else // ASSERT
+ log_warning_p(gc)("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno));
+#endif // ASSERT
+ continue;
+ }
+ // Since msync failed with ENOMEM, the page might not be mapped.
+ // Try to map it, to see if the address is valid.
+ void* const result_addr = mmap((void*) base_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0);
+ if (result_addr != MAP_FAILED) {
+ munmap(result_addr, page_size);
+ }
+ if ((uintptr_t) result_addr == base_addr) {
+ // address is valid
+ max_address_bit = i;
+ break;
+ }
+ }
+ if (max_address_bit == 0) {
+ // probing failed, allocate a very high page and take that bit as the maximum
+ const uintptr_t high_addr = ((uintptr_t) 1U) << DEFAULT_MAX_ADDRESS_BIT;
+ void* const result_addr = mmap((void*) high_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0);
+ if (result_addr != MAP_FAILED) {
+ max_address_bit = BitsPerSize_t - count_leading_zeros((size_t) result_addr) - 1;
+ munmap(result_addr, page_size);
+ }
+ }
+ log_info_p(gc, init)("Probing address space for the highest valid bit: " SIZE_FORMAT, max_address_bit);
+ return MAX2(max_address_bit, MINIMUM_MAX_ADDRESS_BIT);
+#else // LINUX
+ return DEFAULT_MAX_ADDRESS_BIT;
+#endif // LINUX
+}
+
+size_t ZPlatformAddressOffsetBits() {
+ const static size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1;
+ const size_t max_address_offset_bits = valid_max_address_offset_bits - 3;
+ const size_t min_address_offset_bits = max_address_offset_bits - 2;
+ const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio);
+ const size_t address_offset_bits = log2i_exact(address_offset);
+ return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits);
+}
+
+size_t ZPlatformAddressMetadataShift() {
+ return ZPlatformAddressOffsetBits();
+}
diff --git a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..f20ecd9b073c02b5a2da76d9cf5b0c1faf765dde
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP
+#define CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP
+
+const size_t ZPlatformGranuleSizeShift = 21; // 2MB
+const size_t ZPlatformHeapViews = 3;
+const size_t ZPlatformCacheLineSize = 64;
+
+size_t ZPlatformAddressOffsetBits();
+size_t ZPlatformAddressMetadataShift();
+
+#endif // CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad b/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad
new file mode 100644
index 0000000000000000000000000000000000000000..6b6f87814a56ef1b1d11264c22dd7b8194a1fa28
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad
@@ -0,0 +1,233 @@
+//
+// Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+
+source_hpp %{
+
+#include "gc/shared/gc_globals.hpp"
+#include "gc/z/c2/zBarrierSetC2.hpp"
+#include "gc/z/zThreadLocalData.hpp"
+
+%}
+
+source %{
+
+static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, int barrier_data) {
+ if (barrier_data == ZLoadBarrierElided) {
+ return;
+ }
+ ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, barrier_data);
+ __ ld(tmp, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
+ __ andr(tmp, tmp, ref);
+ __ bnez(tmp, *stub->entry(), true /* far */);
+ __ bind(*stub->continuation());
+}
+
+static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
+ ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, ZLoadBarrierStrong);
+ __ j(*stub->entry());
+ __ bind(*stub->continuation());
+}
+
+%}
+
+// Load Pointer
+instruct zLoadP(iRegPNoSp dst, memory mem)
+%{
+ match(Set dst (LoadP mem));
+ predicate(UseZGC && (n->as_Load()->barrier_data() != 0));
+ effect(TEMP dst);
+
+ ins_cost(4 * DEFAULT_COST);
+
+ format %{ "ld $dst, $mem, #@zLoadP" %}
+
+ ins_encode %{
+ const Address ref_addr (as_Register($mem$$base), $mem$$disp);
+ __ ld($dst$$Register, ref_addr);
+ z_load_barrier(_masm, this, ref_addr, $dst$$Register, t0 /* tmp */, barrier_data());
+ %}
+
+ ins_pipe(iload_reg_mem);
+%}
+
+instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+ predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
+ effect(KILL cr, TEMP_DEF res);
+
+ ins_cost(2 * VOLATILE_REF_COST);
+
+ format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapP\n\t"
+ "mv $res, $res == $oldval" %}
+
+ ins_encode %{
+ Label failed;
+ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
+ true /* result_as_bool */);
+ __ beqz($res$$Register, failed);
+ __ mv(t0, $oldval$$Register);
+ __ bind(failed);
+ if (barrier_data() != ZLoadBarrierElided) {
+ Label good;
+ __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */);
+ __ andr(t1, t1, t0);
+ __ beqz(t1, good);
+ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
+ true /* result_as_bool */);
+ __ bind(good);
+ }
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+ predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong));
+ effect(KILL cr, TEMP_DEF res);
+
+ ins_cost(2 * VOLATILE_REF_COST);
+
+ format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapPAcq\n\t"
+ "mv $res, $res == $oldval" %}
+
+ ins_encode %{
+ Label failed;
+ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
+ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
+ true /* result_as_bool */);
+ __ beqz($res$$Register, failed);
+ __ mv(t0, $oldval$$Register);
+ __ bind(failed);
+ if (barrier_data() != ZLoadBarrierElided) {
+ Label good;
+ __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */);
+ __ andr(t1, t1, t0);
+ __ beqz(t1, good);
+ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
+ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
+ true /* result_as_bool */);
+ __ bind(good);
+ }
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{
+ match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+ predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
+ effect(TEMP_DEF res);
+
+ ins_cost(2 * VOLATILE_REF_COST);
+
+ format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangeP" %}
+
+ ins_encode %{
+ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register);
+ if (barrier_data() != ZLoadBarrierElided) {
+ Label good;
+ __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
+ __ andr(t0, t0, $res$$Register);
+ __ beqz(t0, good);
+ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register);
+ __ bind(good);
+ }
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{
+ match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+ predicate(UseZGC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
+ effect(TEMP_DEF res);
+
+ ins_cost(2 * VOLATILE_REF_COST);
+
+ format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangePAcq" %}
+
+ ins_encode %{
+ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
+ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register);
+ if (barrier_data() != ZLoadBarrierElided) {
+ Label good;
+ __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
+ __ andr(t0, t0, $res$$Register);
+ __ beqz(t0, good);
+ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
+ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register);
+ __ bind(good);
+ }
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
+ match(Set prev (GetAndSetP mem newv));
+ predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF prev, KILL cr);
+
+ ins_cost(2 * VOLATILE_REF_COST);
+
+ format %{ "atomic_xchg $prev, $newv, [$mem], #@zGetAndSetP" %}
+
+ ins_encode %{
+ __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
+ z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data());
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
+ match(Set prev (GetAndSetP mem newv));
+ predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() != 0));
+ effect(TEMP_DEF prev, KILL cr);
+
+ ins_cost(VOLATILE_REF_COST);
+
+ format %{ "atomic_xchg_acq $prev, $newv, [$mem], #@zGetAndSetPAcq" %}
+
+ ins_encode %{
+ __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
+ z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data());
+ %}
+ ins_pipe(pipe_serial);
+%}
diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..2936837d95183b3ca1192afa6913279312acc0ac
--- /dev/null
+++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
+#define CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
+
+const int StackAlignmentInBytes = 16;
+
+// Indicates whether the C calling conventions require that
+// 32-bit integer argument values are extended to 64 bits.
+const bool CCallingConventionRequiresIntsAsLongs = false;
+
+// RISCV has adopted a multicopy atomic model closely following
+// that of ARMv8.
+#define CPU_MULTI_COPY_ATOMIC
+
+// To be safe, we deoptimize when we come across an access that needs
+// patching. This is similar to what is done on aarch64.
+#define DEOPTIMIZE_WHEN_PATCHING
+
+#define SUPPORTS_NATIVE_CX8
+
+#define SUPPORT_RESERVED_STACK_AREA
+
+#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false
+
+#define USE_POINTERS_TO_REGISTER_IMPL_ARRAY
+
+#endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..30e3a8779b88cb07382db14d932983118285fb11
--- /dev/null
+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_GLOBALS_RISCV_HPP
+#define CPU_RISCV_GLOBALS_RISCV_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+// Sets the default values for platform dependent flags used by the runtime system.
+// (see globals.hpp)
+
+define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks
+define_pd_global(bool, TrapBasedNullChecks, false);
+define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast
+
+define_pd_global(uintx, CodeCacheSegmentSize, 64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment.
+define_pd_global(intx, CodeEntryAlignment, 64);
+define_pd_global(intx, OptoLoopAlignment, 16);
+
+#define DEFAULT_STACK_YELLOW_PAGES (2)
+#define DEFAULT_STACK_RED_PAGES (1)
+// Java_java_net_SocketOutputStream_socketWrite0() uses a 64k buffer on the
+// stack if compiled for unix and LP64. To pass stack overflow tests we need
+// 20 shadow pages.
+#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+5))
+#define DEFAULT_STACK_RESERVED_PAGES (1)
+
+#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES
+#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES
+#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES
+#define MIN_STACK_RESERVED_PAGES (0)
+
+define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES);
+define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES);
+define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES);
+define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES);
+
+define_pd_global(bool, RewriteBytecodes, true);
+define_pd_global(bool, RewriteFrequentPairs, true);
+
+define_pd_global(bool, PreserveFramePointer, false);
+
+define_pd_global(uintx, TypeProfileLevel, 111);
+
+define_pd_global(bool, CompactStrings, true);
+
+// Clear short arrays bigger than one word in an arch-specific way
+define_pd_global(intx, InitArrayShortSize, BytesPerLong);
+
+define_pd_global(intx, InlineSmallCode, 1000);
+
+#define ARCH_FLAGS(develop, \
+ product, \
+ notproduct, \
+ range, \
+ constraint) \
+ \
+ product(bool, NearCpool, true, \
+ "constant pool is close to instructions") \
+ product(intx, BlockZeroingLowLimit, 256, \
+ "Minimum size in bytes when block zeroing will be used") \
+ range(1, max_jint) \
+ product(bool, TraceTraps, false, "Trace all traps the signal handler") \
+ /* For now we're going to be safe and add the I/O bits to userspace fences. */ \
+ product(bool, UseConservativeFence, true, \
+ "Extend i for r and o for w in the pred/succ flags of fence;" \
+ "Extend fence.i to fence.i + fence.") \
+ product(bool, AvoidUnalignedAccesses, true, \
+ "Avoid generating unaligned memory accesses") \
+ product(bool, UseRVV, false, EXPERIMENTAL, "Use RVV instructions") \
+ product(bool, UseRVC, false, EXPERIMENTAL, "Use RVC instructions") \
+ product(bool, UseZba, false, EXPERIMENTAL, "Use Zba instructions") \
+ product(bool, UseZbb, false, EXPERIMENTAL, "Use Zbb instructions") \
+ product(bool, UseRVVForBigIntegerShiftIntrinsics, true, \
+ "Use RVV instructions for left/right shift of BigInteger")
+
+#endif // CPU_RISCV_GLOBALS_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/icBuffer_riscv.cpp b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..cc93103dc556785d3761393aea17f5fce5516cad
--- /dev/null
+++ b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "code/icBuffer.hpp"
+#include "gc/shared/collectedHeap.inline.hpp"
+#include "interpreter/bytecodes.hpp"
+#include "memory/resourceArea.hpp"
+#include "nativeInst_riscv.hpp"
+#include "oops/oop.inline.hpp"
+
+int InlineCacheBuffer::ic_stub_code_size() {
+ // 6: auipc + ld + auipc + jalr + address(2 * instruction_size)
+ // 5: auipc + ld + j + address(2 * instruction_size)
+ return (MacroAssembler::far_branches() ? 6 : 5) * NativeInstruction::instruction_size;
+}
+
+#define __ masm->
+
+void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) {
+ assert_cond(code_begin != NULL && entry_point != NULL);
+ ResourceMark rm;
+ CodeBuffer code(code_begin, ic_stub_code_size());
+ MacroAssembler* masm = new MacroAssembler(&code);
+ // Note: even though the code contains an embedded value, we do not need reloc info
+ // because
+ // (1) the value is old (i.e., doesn't matter for scavenges)
+ // (2) these ICStubs are removed *before* a GC happens, so the roots disappear
+
+ address start = __ pc();
+ Label l;
+ __ ld(t1, l);
+ __ far_jump(ExternalAddress(entry_point));
+ __ align(wordSize);
+ __ bind(l);
+ __ emit_int64((intptr_t)cached_value);
+ // Only need to invalidate the 1st two instructions - not the whole ic stub
+ ICache::invalidate_range(code_begin, InlineCacheBuffer::ic_stub_code_size());
+ assert(__ pc() - start == ic_stub_code_size(), "must be");
+}
+
+address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
+ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object
+ NativeJump* jump = nativeJump_at(move->next_instruction_address());
+ return jump->jump_destination();
+}
+
+
+void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
+ // The word containing the cached value is at the end of this IC buffer
+ uintptr_t *p = (uintptr_t *)(code_begin + ic_stub_code_size() - wordSize);
+ void* o = (void*)*p;
+ return o;
+}
diff --git a/src/hotspot/cpu/riscv/icache_riscv.cpp b/src/hotspot/cpu/riscv/icache_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..922a80f9f3e0050288387c66b40b7e1543b88e0b
--- /dev/null
+++ b/src/hotspot/cpu/riscv/icache_riscv.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "runtime/icache.hpp"
+
+#define __ _masm->
+
+static int icache_flush(address addr, int lines, int magic) {
+ os::icache_flush((long int) addr, (long int) (addr + (lines << ICache::log2_line_size)));
+ return magic;
+}
+
+void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {
+ address start = (address)icache_flush;
+ *flush_icache_stub = (ICache::flush_icache_stub_t)start;
+
+ // ICache::invalidate_range() contains explicit condition that the first
+ // call is invoked on the generated icache flush stub code range.
+ ICache::invalidate_range(start, 0);
+
+ {
+ StubCodeMark mark(this, "ICache", "fake_stub_for_inlined_icache_flush");
+ __ ret();
+ }
+}
+
+#undef __
diff --git a/src/hotspot/cpu/riscv/icache_riscv.hpp b/src/hotspot/cpu/riscv/icache_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..5bf40ca820485a482e132e6b4d3cb12ce8ac2c8d
--- /dev/null
+++ b/src/hotspot/cpu/riscv/icache_riscv.hpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_ICACHE_RISCV_HPP
+#define CPU_RISCV_ICACHE_RISCV_HPP
+
+// Interface for updating the instruction cache. Whenever the VM
+// modifies code, part of the processor instruction cache potentially
+// has to be flushed.
+
+class ICache : public AbstractICache {
+public:
+ enum {
+ stub_size = 16, // Size of the icache flush stub in bytes
+ line_size = BytesPerWord, // conservative
+ log2_line_size = LogBytesPerWord // log2(line_size)
+ };
+};
+
+#endif // CPU_RISCV_ICACHE_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d12dcb2af1957a6aac88c6541841018fdbd0e65f
--- /dev/null
+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
@@ -0,0 +1,1940 @@
+/*
+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
+#include "interp_masm_riscv.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "logging/log.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/markWord.hpp"
+#include "oops/method.hpp"
+#include "oops/methodData.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "runtime/basicLock.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/safepointMechanism.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/thread.inline.hpp"
+#include "utilities/powerOfTwo.hpp"
+
+void InterpreterMacroAssembler::narrow(Register result) {
+ // Get method->_constMethod->_result_type
+ ld(t0, Address(fp, frame::interpreter_frame_method_offset * wordSize));
+ ld(t0, Address(t0, Method::const_offset()));
+ lbu(t0, Address(t0, ConstMethod::result_type_offset()));
+
+ Label done, notBool, notByte, notChar;
+
+ // common case first
+ mv(t1, T_INT);
+ beq(t0, t1, done);
+
+ // mask integer result to narrower return type.
+ mv(t1, T_BOOLEAN);
+ bne(t0, t1, notBool);
+
+ andi(result, result, 0x1);
+ j(done);
+
+ bind(notBool);
+ mv(t1, T_BYTE);
+ bne(t0, t1, notByte);
+ sign_extend(result, result, 8);
+ j(done);
+
+ bind(notByte);
+ mv(t1, T_CHAR);
+ bne(t0, t1, notChar);
+ zero_extend(result, result, 16);
+ j(done);
+
+ bind(notChar);
+ sign_extend(result, result, 16);
+
+ // Nothing to do for T_INT
+ bind(done);
+ addw(result, result, zr);
+}
+
+void InterpreterMacroAssembler::jump_to_entry(address entry) {
+ assert(entry != NULL, "Entry must have been generated by now");
+ j(entry);
+}
+
+void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
+ if (JvmtiExport::can_pop_frame()) {
+ Label L;
+ // Initiate popframe handling only if it is not already being
+ // processed. If the flag has the popframe_processing bit set,
+ // it means that this code is called *during* popframe handling - we
+ // don't want to reenter.
+ // This method is only called just after the call into the vm in
+ // call_VM_base, so the arg registers are available.
+ lwu(t1, Address(xthread, JavaThread::popframe_condition_offset()));
+ andi(t0, t1, JavaThread::popframe_pending_bit);
+ beqz(t0, L);
+ andi(t0, t1, JavaThread::popframe_processing_bit);
+ bnez(t0, L);
+ // Call Interpreter::remove_activation_preserving_args_entry() to get the
+ // address of the same-named entrypoint in the generated interpreter code.
+ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
+ jr(x10);
+ bind(L);
+ }
+}
+
+
+void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
+ ld(x12, Address(xthread, JavaThread::jvmti_thread_state_offset()));
+ const Address tos_addr(x12, JvmtiThreadState::earlyret_tos_offset());
+ const Address oop_addr(x12, JvmtiThreadState::earlyret_oop_offset());
+ const Address val_addr(x12, JvmtiThreadState::earlyret_value_offset());
+ switch (state) {
+ case atos:
+ ld(x10, oop_addr);
+ sd(zr, oop_addr);
+ verify_oop(x10);
+ break;
+ case ltos:
+ ld(x10, val_addr);
+ break;
+ case btos: // fall through
+ case ztos: // fall through
+ case ctos: // fall through
+ case stos: // fall through
+ case itos:
+ lwu(x10, val_addr);
+ break;
+ case ftos:
+ flw(f10, val_addr);
+ break;
+ case dtos:
+ fld(f10, val_addr);
+ break;
+ case vtos:
+ /* nothing to do */
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ // Clean up tos value in the thread object
+ mvw(t0, (int) ilgl);
+ sw(t0, tos_addr);
+ sw(zr, val_addr);
+}
+
+
+void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) {
+ if (JvmtiExport::can_force_early_return()) {
+ Label L;
+ ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset()));
+ beqz(t0, L); // if [thread->jvmti_thread_state() == NULL] then exit
+
+ // Initiate earlyret handling only if it is not already being processed.
+ // If the flag has the earlyret_processing bit set, it means that this code
+ // is called *during* earlyret handling - we don't want to reenter.
+ lwu(t0, Address(t0, JvmtiThreadState::earlyret_state_offset()));
+ mv(t1, JvmtiThreadState::earlyret_pending);
+ bne(t0, t1, L);
+
+ // Call Interpreter::remove_activation_early_entry() to get the address of the
+ // same-named entrypoint in the generated interpreter code.
+ ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset()));
+ lwu(t0, Address(t0, JvmtiThreadState::earlyret_tos_offset()));
+ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), t0);
+ jr(x10);
+ bind(L);
+ }
+}
+
+void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset) {
+ assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
+ lhu(reg, Address(xbcp, bcp_offset));
+ revb_h(reg, reg);
+}
+
+void InterpreterMacroAssembler::get_dispatch() {
+ int32_t offset = 0;
+ la_patchable(xdispatch, ExternalAddress((address)Interpreter::dispatch_table()), offset);
+ addi(xdispatch, xdispatch, offset);
+}
+
+void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
+ int bcp_offset,
+ size_t index_size) {
+ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
+ if (index_size == sizeof(u2)) {
+ load_unsigned_short(index, Address(xbcp, bcp_offset));
+ } else if (index_size == sizeof(u4)) {
+ lwu(index, Address(xbcp, bcp_offset));
+ // Check if the secondary index definition is still ~x, otherwise
+ // we have to change the following assembler code to calculate the
+ // plain index.
+ assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
+ xori(index, index, -1);
+ addw(index, index, zr);
+ } else if (index_size == sizeof(u1)) {
+ load_unsigned_byte(index, Address(xbcp, bcp_offset));
+ } else {
+ ShouldNotReachHere();
+ }
+}
+
+// Return
+// Rindex: index into constant pool
+// Rcache: address of cache entry - ConstantPoolCache::base_offset()
+//
+// A caller must add ConstantPoolCache::base_offset() to Rcache to get
+// the true address of the cache entry.
+//
+void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
+ Register index,
+ int bcp_offset,
+ size_t index_size) {
+ assert_different_registers(cache, index);
+ assert_different_registers(cache, xcpool);
+ get_cache_index_at_bcp(index, bcp_offset, index_size);
+ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
+ // Convert from field index to ConstantPoolCacheEntry
+ // riscv already has the cache in xcpool so there is no need to
+ // install it in cache. Instead we pre-add the indexed offset to
+ // xcpool and return it in cache. All clients of this method need to
+ // be modified accordingly.
+ shadd(cache, index, xcpool, cache, 5);
+}
+
+
+void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
+ Register index,
+ Register bytecode,
+ int byte_no,
+ int bcp_offset,
+ size_t index_size) {
+ get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
+ // We use a 32-bit load here since the layout of 64-bit words on
+ // little-endian machines allow us that.
+ // n.b. unlike x86 cache already includes the index offset
+ la(bytecode, Address(cache,
+ ConstantPoolCache::base_offset() +
+ ConstantPoolCacheEntry::indices_offset()));
+ membar(MacroAssembler::AnyAny);
+ lwu(bytecode, bytecode);
+ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+ const int shift_count = (1 + byte_no) * BitsPerByte;
+ slli(bytecode, bytecode, XLEN - (shift_count + BitsPerByte));
+ srli(bytecode, bytecode, XLEN - BitsPerByte);
+}
+
+void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
+ Register tmp,
+ int bcp_offset,
+ size_t index_size) {
+ assert(cache != tmp, "must use different register");
+ get_cache_index_at_bcp(tmp, bcp_offset, index_size);
+ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
+ // Convert from field index to ConstantPoolCacheEntry index
+ // and from word offset to byte offset
+ assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord,
+ "else change next line");
+ ld(cache, Address(fp, frame::interpreter_frame_cache_offset * wordSize));
+ // skip past the header
+ add(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
+ // construct pointer to cache entry
+ shadd(cache, tmp, cache, tmp, 2 + LogBytesPerWord);
+}
+
+// Load object from cpool->resolved_references(index)
+void InterpreterMacroAssembler::load_resolved_reference_at_index(
+ Register result, Register index, Register tmp) {
+ assert_different_registers(result, index);
+
+ get_constant_pool(result);
+ // Load pointer for resolved_references[] objArray
+ ld(result, Address(result, ConstantPool::cache_offset_in_bytes()));
+ ld(result, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes()));
+ resolve_oop_handle(result, tmp);
+ // Add in the index
+ addi(index, index, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
+ shadd(result, index, result, index, LogBytesPerHeapOop);
+ load_heap_oop(result, Address(result, 0));
+}
+
+void InterpreterMacroAssembler::load_resolved_klass_at_offset(
+ Register cpool, Register index, Register klass, Register temp) {
+ shadd(temp, index, cpool, temp, LogBytesPerWord);
+ lhu(temp, Address(temp, sizeof(ConstantPool))); // temp = resolved_klass_index
+ ld(klass, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); // klass = cpool->_resolved_klasses
+ shadd(klass, temp, klass, temp, LogBytesPerWord);
+ ld(klass, Address(klass, Array::base_offset_in_bytes()));
+}
+
+void InterpreterMacroAssembler::load_resolved_method_at_index(int byte_no,
+ Register method,
+ Register cache) {
+ const int method_offset = in_bytes(
+ ConstantPoolCache::base_offset() +
+ ((byte_no == TemplateTable::f2_byte)
+ ? ConstantPoolCacheEntry::f2_offset()
+ : ConstantPoolCacheEntry::f1_offset()));
+
+ ld(method, Address(cache, method_offset)); // get f1 Method*
+}
+
+// Generate a subtype check: branch to ok_is_subtype if sub_klass is a
+// subtype of super_klass.
+//
+// Args:
+// x10: superklass
+// Rsub_klass: subklass
+//
+// Kills:
+// x12, x15
+void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass,
+ Label& ok_is_subtype) {
+ assert(Rsub_klass != x10, "x10 holds superklass");
+ assert(Rsub_klass != x12, "x12 holds 2ndary super array length");
+ assert(Rsub_klass != x15, "x15 holds 2ndary super array scan ptr");
+
+ // Profile the not-null value's klass.
+ profile_typecheck(x12, Rsub_klass, x15); // blows x12, reloads x15
+
+ // Do the check.
+ check_klass_subtype(Rsub_klass, x10, x12, ok_is_subtype); // blows x12
+
+ // Profile the failure of the check.
+ profile_typecheck_failed(x12); // blows x12
+}
+
+// Java Expression Stack
+
+void InterpreterMacroAssembler::pop_ptr(Register r) {
+ ld(r, Address(esp, 0));
+ addi(esp, esp, wordSize);
+}
+
+void InterpreterMacroAssembler::pop_i(Register r) {
+ lw(r, Address(esp, 0)); // lw do signed extended
+ addi(esp, esp, wordSize);
+}
+
+void InterpreterMacroAssembler::pop_l(Register r) {
+ ld(r, Address(esp, 0));
+ addi(esp, esp, 2 * Interpreter::stackElementSize);
+}
+
+void InterpreterMacroAssembler::push_ptr(Register r) {
+ addi(esp, esp, -wordSize);
+ sd(r, Address(esp, 0));
+}
+
+void InterpreterMacroAssembler::push_i(Register r) {
+ addi(esp, esp, -wordSize);
+ addw(r, r, zr); // signed extended
+ sd(r, Address(esp, 0));
+}
+
+void InterpreterMacroAssembler::push_l(Register r) {
+ addi(esp, esp, -2 * wordSize);
+ sd(zr, Address(esp, wordSize));
+ sd(r, Address(esp));
+}
+
+void InterpreterMacroAssembler::pop_f(FloatRegister r) {
+ flw(r, esp, 0);
+ addi(esp, esp, wordSize);
+}
+
+void InterpreterMacroAssembler::pop_d(FloatRegister r) {
+ fld(r, esp, 0);
+ addi(esp, esp, 2 * Interpreter::stackElementSize);
+}
+
+void InterpreterMacroAssembler::push_f(FloatRegister r) {
+ addi(esp, esp, -wordSize);
+ fsw(r, Address(esp, 0));
+}
+
+void InterpreterMacroAssembler::push_d(FloatRegister r) {
+ addi(esp, esp, -2 * wordSize);
+ fsd(r, Address(esp, 0));
+}
+
+void InterpreterMacroAssembler::pop(TosState state) {
+ switch (state) {
+ case atos:
+ pop_ptr();
+ verify_oop(x10);
+ break;
+ case btos: // fall through
+ case ztos: // fall through
+ case ctos: // fall through
+ case stos: // fall through
+ case itos:
+ pop_i();
+ break;
+ case ltos:
+ pop_l();
+ break;
+ case ftos:
+ pop_f();
+ break;
+ case dtos:
+ pop_d();
+ break;
+ case vtos:
+ /* nothing to do */
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+}
+
+void InterpreterMacroAssembler::push(TosState state) {
+ switch (state) {
+ case atos:
+ verify_oop(x10);
+ push_ptr();
+ break;
+ case btos: // fall through
+ case ztos: // fall through
+ case ctos: // fall through
+ case stos: // fall through
+ case itos:
+ push_i();
+ break;
+ case ltos:
+ push_l();
+ break;
+ case ftos:
+ push_f();
+ break;
+ case dtos:
+ push_d();
+ break;
+ case vtos:
+ /* nothing to do */
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+}
+
+// Helpers for swap and dup
+void InterpreterMacroAssembler::load_ptr(int n, Register val) {
+ ld(val, Address(esp, Interpreter::expr_offset_in_bytes(n)));
+}
+
+void InterpreterMacroAssembler::store_ptr(int n, Register val) {
+ sd(val, Address(esp, Interpreter::expr_offset_in_bytes(n)));
+}
+
+void InterpreterMacroAssembler::load_float(Address src) {
+ flw(f10, src);
+}
+
+void InterpreterMacroAssembler::load_double(Address src) {
+ fld(f10, src);
+}
+
+void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() {
+ // set sender sp
+ mv(x30, sp);
+ // record last_sp
+ sd(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
+}
+
+// Jump to from_interpreted entry of a call unless single stepping is possible
+// in this thread in which case we must call the i2i entry
+void InterpreterMacroAssembler::jump_from_interpreted(Register method) {
+ prepare_to_jump_from_interpreted();
+ if (JvmtiExport::can_post_interpreter_events()) {
+ Label run_compiled_code;
+ // JVMTI events, such as single-stepping, are implemented partly by avoiding running
+ // compiled code in threads for which the event is enabled. Check here for
+ // interp_only_mode if these events CAN be enabled.
+ lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset()));
+ beqz(t0, run_compiled_code);
+ ld(t0, Address(method, Method::interpreter_entry_offset()));
+ jr(t0);
+ bind(run_compiled_code);
+ }
+
+ ld(t0, Address(method, Method::from_interpreted_offset()));
+ jr(t0);
+}
+
+// The following two routines provide a hook so that an implementation
+// can schedule the dispatch in two parts. amd64 does not do this.
+void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
+}
+
+void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
+ dispatch_next(state, step);
+}
+
+void InterpreterMacroAssembler::dispatch_base(TosState state,
+ address* table,
+ bool verifyoop,
+ bool generate_poll,
+ Register Rs) {
+ // Pay attention to the argument Rs, which is acquiesce in t0.
+ if (VerifyActivationFrameSize) {
+ Unimplemented();
+ }
+ if (verifyoop && state == atos) {
+ verify_oop(x10);
+ }
+
+ Label safepoint;
+ address* const safepoint_table = Interpreter::safept_table(state);
+ bool needs_thread_local_poll = generate_poll && table != safepoint_table;
+
+ if (needs_thread_local_poll) {
+ NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
+ ld(t1, Address(xthread, JavaThread::polling_word_offset()));
+ andi(t1, t1, SafepointMechanism::poll_bit());
+ bnez(t1, safepoint);
+ }
+ if (table == Interpreter::dispatch_table(state)) {
+ li(t1, Interpreter::distance_from_dispatch_table(state));
+ add(t1, Rs, t1);
+ shadd(t1, t1, xdispatch, t1, 3);
+ } else {
+ mv(t1, (address)table);
+ shadd(t1, Rs, t1, Rs, 3);
+ }
+ ld(t1, Address(t1));
+ jr(t1);
+
+ if (needs_thread_local_poll) {
+ bind(safepoint);
+ la(t1, ExternalAddress((address)safepoint_table));
+ shadd(t1, Rs, t1, Rs, 3);
+ ld(t1, Address(t1));
+ jr(t1);
+ }
+}
+
+void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll, Register Rs) {
+ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll, Rs);
+}
+
+void InterpreterMacroAssembler::dispatch_only_normal(TosState state, Register Rs) {
+ dispatch_base(state, Interpreter::normal_table(state), Rs);
+}
+
+void InterpreterMacroAssembler::dispatch_only_noverify(TosState state, Register Rs) {
+ dispatch_base(state, Interpreter::normal_table(state), false, Rs);
+}
+
+void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) {
+ // load next bytecode
+ load_unsigned_byte(t0, Address(xbcp, step));
+ add(xbcp, xbcp, step);
+ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
+}
+
+void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
+ // load current bytecode
+ lbu(t0, Address(xbcp, 0));
+ dispatch_base(state, table);
+}
+
+// remove activation
+//
+// Apply stack watermark barrier.
+// Unlock the receiver if this is a synchronized method.
+// Unlock any Java monitors from syncronized blocks.
+// Remove the activation from the stack.
+//
+// If there are locked Java monitors
+// If throw_monitor_exception
+// throws IllegalMonitorStateException
+// Else if install_monitor_exception
+// installs IllegalMonitorStateException
+// Else
+// no error processing
+void InterpreterMacroAssembler::remove_activation(
+ TosState state,
+ bool throw_monitor_exception,
+ bool install_monitor_exception,
+ bool notify_jvmdi) {
+ // Note: Registers x13 may be in use for the
+ // result check if synchronized method
+ Label unlocked, unlock, no_unlock;
+
+ // The below poll is for the stack watermark barrier. It allows fixing up frames lazily,
+ // that would normally not be safe to use. Such bad returns into unsafe territory of
+ // the stack, will call InterpreterRuntime::at_unwind.
+ Label slow_path;
+ Label fast_path;
+ safepoint_poll(slow_path, true /* at_return */, false /* acquire */, false /* in_nmethod */);
+ j(fast_path);
+
+ bind(slow_path);
+ push(state);
+ set_last_Java_frame(esp, fp, (address)pc(), t0);
+ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), xthread);
+ reset_last_Java_frame(true);
+ pop(state);
+
+ bind(fast_path);
+
+ // get the value of _do_not_unlock_if_synchronized into x13
+ const Address do_not_unlock_if_synchronized(xthread,
+ in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+ lbu(x13, do_not_unlock_if_synchronized);
+ sb(zr, do_not_unlock_if_synchronized); // reset the flag
+
+ // get method access flags
+ ld(x11, Address(fp, frame::interpreter_frame_method_offset * wordSize));
+ ld(x12, Address(x11, Method::access_flags_offset()));
+ andi(t0, x12, JVM_ACC_SYNCHRONIZED);
+ beqz(t0, unlocked);
+
+ // Don't unlock anything if the _do_not_unlock_if_synchronized flag
+ // is set.
+ bnez(x13, no_unlock);
+
+ // unlock monitor
+ push(state); // save result
+
+ // BasicObjectLock will be first in list, since this is a
+ // synchronized method. However, need to check that the object has
+ // not been unlocked by an explicit monitorexit bytecode.
+ const Address monitor(fp, frame::interpreter_frame_initial_sp_offset *
+ wordSize - (int) sizeof(BasicObjectLock));
+ // We use c_rarg1 so that if we go slow path it will be the correct
+ // register for unlock_object to pass to VM directly
+ la(c_rarg1, monitor); // address of first monitor
+
+ ld(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
+ bnez(x10, unlock);
+
+ pop(state);
+ if (throw_monitor_exception) {
+ // Entry already unlocked, need to throw exception
+ call_VM(noreg, CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::throw_illegal_monitor_state_exception));
+ should_not_reach_here();
+ } else {
+ // Monitor already unlocked during a stack unroll. If requested,
+ // install an illegal_monitor_state_exception. Continue with
+ // stack unrolling.
+ if (install_monitor_exception) {
+ call_VM(noreg, CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::new_illegal_monitor_state_exception));
+ }
+ j(unlocked);
+ }
+
+ bind(unlock);
+ unlock_object(c_rarg1);
+ pop(state);
+
+ // Check that for block-structured locking (i.e., that all locked
+ // objects has been unlocked)
+ bind(unlocked);
+
+ // x10: Might contain return value
+
+ // Check that all monitors are unlocked
+ {
+ Label loop, exception, entry, restart;
+ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+ const Address monitor_block_top(
+ fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+ const Address monitor_block_bot(
+ fp, frame::interpreter_frame_initial_sp_offset * wordSize);
+
+ bind(restart);
+ // We use c_rarg1 so that if we go slow path it will be the correct
+ // register for unlock_object to pass to VM directly
+ ld(c_rarg1, monitor_block_top); // points to current entry, starting
+ // with top-most entry
+ la(x9, monitor_block_bot); // points to word before bottom of
+ // monitor block
+
+ j(entry);
+
+ // Entry already locked, need to throw exception
+ bind(exception);
+
+ if (throw_monitor_exception) {
+ // Throw exception
+ MacroAssembler::call_VM(noreg,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::
+ throw_illegal_monitor_state_exception));
+
+ should_not_reach_here();
+ } else {
+ // Stack unrolling. Unlock object and install illegal_monitor_exception.
+ // Unlock does not block, so don't have to worry about the frame.
+ // We don't have to preserve c_rarg1 since we are going to throw an exception.
+
+ push(state);
+ unlock_object(c_rarg1);
+ pop(state);
+
+ if (install_monitor_exception) {
+ call_VM(noreg, CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::
+ new_illegal_monitor_state_exception));
+ }
+
+ j(restart);
+ }
+
+ bind(loop);
+ // check if current entry is used
+ add(t0, c_rarg1, BasicObjectLock::obj_offset_in_bytes());
+ ld(t0, Address(t0, 0));
+ bnez(t0, exception);
+
+ add(c_rarg1, c_rarg1, entry_size); // otherwise advance to next entry
+ bind(entry);
+ bne(c_rarg1, x9, loop); // check if bottom reached if not at bottom then check this entry
+ }
+
+ bind(no_unlock);
+
+ // jvmti support
+ if (notify_jvmdi) {
+ notify_method_exit(state, NotifyJVMTI); // preserve TOSCA
+
+ } else {
+ notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA
+ }
+
+ // remove activation
+ // get sender esp
+ ld(t1,
+ Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize));
+ if (StackReservedPages > 0) {
+ // testing if reserved zone needs to be re-enabled
+ Label no_reserved_zone_enabling;
+
+ ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset()));
+ ble(t1, t0, no_reserved_zone_enabling);
+
+ call_VM_leaf(
+ CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), xthread);
+ call_VM(noreg, CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::throw_delayed_StackOverflowError));
+ should_not_reach_here();
+
+ bind(no_reserved_zone_enabling);
+ }
+
+ // restore sender esp
+ mv(esp, t1);
+
+ // remove frame anchor
+ leave();
+ // If we're returning to interpreted code we will shortly be
+ // adjusting SP to allow some space for ESP. If we're returning to
+ // compiled code the saved sender SP was saved in sender_sp, so this
+ // restores it.
+ andi(sp, esp, -16);
+}
+
+// Lock object
+//
+// Args:
+// c_rarg1: BasicObjectLock to be used for locking
+//
+// Kills:
+// x10
+// c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs)
+// t0, t1 (temp regs)
+void InterpreterMacroAssembler::lock_object(Register lock_reg)
+{
+ assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1");
+ if (UseHeavyMonitors) {
+ call_VM(noreg,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
+ lock_reg);
+ } else {
+ Label done;
+
+ const Register swap_reg = x10;
+ const Register tmp = c_rarg2;
+ const Register obj_reg = c_rarg3; // Will contain the oop
+
+ const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
+ const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
+ const int mark_offset = lock_offset +
+ BasicLock::displaced_header_offset_in_bytes();
+
+ Label slow_case;
+
+ // Load object pointer into obj_reg c_rarg3
+ ld(obj_reg, Address(lock_reg, obj_offset));
+
+ if (DiagnoseSyncOnValueBasedClasses != 0) {
+ load_klass(tmp, obj_reg);
+ lwu(tmp, Address(tmp, Klass::access_flags_offset()));
+ andi(tmp, tmp, JVM_ACC_IS_VALUE_BASED_CLASS);
+ bnez(tmp, slow_case);
+ }
+
+ // Load (object->mark() | 1) into swap_reg
+ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+ ori(swap_reg, t0, 1);
+
+ // Save (object->mark() | 1) into BasicLock's displaced header
+ sd(swap_reg, Address(lock_reg, mark_offset));
+
+ assert(lock_offset == 0,
+ "displached header must be first word in BasicObjectLock");
+
+ cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL);
+
+ // Test if the oopMark is an obvious stack pointer, i.e.,
+ // 1) (mark & 7) == 0, and
+ // 2) sp <= mark < mark + os::pagesize()
+ //
+ // These 3 tests can be done by evaluating the following
+ // expression: ((mark - sp) & (7 - os::vm_page_size())),
+ // assuming both stack pointer and pagesize have their
+ // least significant 3 bits clear.
+ // NOTE: the oopMark is in swap_reg x10 as the result of cmpxchg
+ sub(swap_reg, swap_reg, sp);
+ li(t0, (int64_t)(7 - os::vm_page_size()));
+ andr(swap_reg, swap_reg, t0);
+
+ // Save the test result, for recursive case, the result is zero
+ sd(swap_reg, Address(lock_reg, mark_offset));
+ beqz(swap_reg, done);
+
+ bind(slow_case);
+
+ // Call the runtime routine for slow case
+ call_VM(noreg,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
+ lock_reg);
+
+ bind(done);
+ }
+}
+
+
+// Unlocks an object. Used in monitorexit bytecode and
+// remove_activation. Throws an IllegalMonitorException if object is
+// not locked by current thread.
+//
+// Args:
+// c_rarg1: BasicObjectLock for lock
+//
+// Kills:
+// x10
+// c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs)
+// t0, t1 (temp regs)
+void InterpreterMacroAssembler::unlock_object(Register lock_reg)
+{
+ assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1");
+
+ if (UseHeavyMonitors) {
+ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
+ } else {
+ Label done;
+
+ const Register swap_reg = x10;
+ const Register header_reg = c_rarg2; // Will contain the old oopMark
+ const Register obj_reg = c_rarg3; // Will contain the oop
+
+ save_bcp(); // Save in case of exception
+
+ // Convert from BasicObjectLock structure to object and BasicLock
+ // structure Store the BasicLock address into x10
+ la(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes()));
+
+ // Load oop into obj_reg(c_rarg3)
+ ld(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
+
+ // Free entry
+ sd(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
+
+ // Load the old header from BasicLock structure
+ ld(header_reg, Address(swap_reg,
+ BasicLock::displaced_header_offset_in_bytes()));
+
+ // Test for recursion
+ beqz(header_reg, done);
+
+ // Atomic swap back the old header
+ cmpxchg_obj_header(swap_reg, header_reg, obj_reg, t0, done, /*fallthrough*/NULL);
+
+ // Call the runtime routine for slow case.
+ sd(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj
+ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
+
+ bind(done);
+
+ restore_bcp();
+ }
+}
+
+
+void InterpreterMacroAssembler::test_method_data_pointer(Register mdp,
+ Label& zero_continue) {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+ ld(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
+ beqz(mdp, zero_continue);
+}
+
+// Set the method data pointer for the current bcp.
+void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+ Label set_mdp;
+ push_reg(0xc00, sp); // save x10, x11
+
+ // Test MDO to avoid the call if it is NULL.
+ ld(x10, Address(xmethod, in_bytes(Method::method_data_offset())));
+ beqz(x10, set_mdp);
+ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), xmethod, xbcp);
+ // x10: mdi
+ // mdo is guaranteed to be non-zero here, we checked for it before the call.
+ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset())));
+ la(x11, Address(x11, in_bytes(MethodData::data_offset())));
+ add(x10, x11, x10);
+ sd(x10, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
+ bind(set_mdp);
+ pop_reg(0xc00, sp);
+}
+
+void InterpreterMacroAssembler::verify_method_data_pointer() {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+#ifdef ASSERT
+ Label verify_continue;
+ add(sp, sp, -4 * wordSize);
+ sd(x10, Address(sp, 0));
+ sd(x11, Address(sp, wordSize));
+ sd(x12, Address(sp, 2 * wordSize));
+ sd(x13, Address(sp, 3 * wordSize));
+ test_method_data_pointer(x13, verify_continue); // If mdp is zero, continue
+ get_method(x11);
+
+ // If the mdp is valid, it will point to a DataLayout header which is
+ // consistent with the bcp. The converse is highly probable also.
+ lh(x12, Address(x13, in_bytes(DataLayout::bci_offset())));
+ ld(t0, Address(x11, Method::const_offset()));
+ add(x12, x12, t0);
+ la(x12, Address(x12, ConstMethod::codes_offset()));
+ beq(x12, xbcp, verify_continue);
+ // x10: method
+ // xbcp: bcp // xbcp == 22
+ // x13: mdp
+ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp),
+ x11, xbcp, x13);
+ bind(verify_continue);
+ ld(x10, Address(sp, 0));
+ ld(x11, Address(sp, wordSize));
+ ld(x12, Address(sp, 2 * wordSize));
+ ld(x13, Address(sp, 3 * wordSize));
+ add(sp, sp, 4 * wordSize);
+#endif // ASSERT
+}
+
+
+void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,
+ int constant,
+ Register value) {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+ Address data(mdp_in, constant);
+ sd(value, data);
+}
+
+
+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
+ int constant,
+ bool decrement) {
+ increment_mdp_data_at(mdp_in, noreg, constant, decrement);
+}
+
+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
+ Register reg,
+ int constant,
+ bool decrement) {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+ // %%% this does 64bit counters at best it is wasting space
+ // at worst it is a rare bug when counters overflow
+
+ assert_different_registers(t1, t0, mdp_in, reg);
+
+ Address addr1(mdp_in, constant);
+ Address addr2(t1, 0);
+ Address &addr = addr1;
+ if (reg != noreg) {
+ la(t1, addr1);
+ add(t1, t1, reg);
+ addr = addr2;
+ }
+
+ if (decrement) {
+ ld(t0, addr);
+ addi(t0, t0, -DataLayout::counter_increment);
+ Label L;
+ bltz(t0, L); // skip store if counter underflow
+ sd(t0, addr);
+ bind(L);
+ } else {
+ assert(DataLayout::counter_increment == 1,
+ "flow-free idiom only works with 1");
+ ld(t0, addr);
+ addi(t0, t0, DataLayout::counter_increment);
+ Label L;
+ blez(t0, L); // skip store if counter overflow
+ sd(t0, addr);
+ bind(L);
+ }
+}
+
+void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
+ int flag_byte_constant) {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+ int flags_offset = in_bytes(DataLayout::flags_offset());
+ // Set the flag
+ lbu(t1, Address(mdp_in, flags_offset));
+ ori(t1, t1, flag_byte_constant);
+ sb(t1, Address(mdp_in, flags_offset));
+}
+
+
+void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
+ int offset,
+ Register value,
+ Register test_value_out,
+ Label& not_equal_continue) {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+ if (test_value_out == noreg) {
+ ld(t1, Address(mdp_in, offset));
+ bne(value, t1, not_equal_continue);
+ } else {
+ // Put the test value into a register, so caller can use it:
+ ld(test_value_out, Address(mdp_in, offset));
+ bne(value, test_value_out, not_equal_continue);
+ }
+}
+
+
+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
+ int offset_of_disp) {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+ ld(t1, Address(mdp_in, offset_of_disp));
+ add(mdp_in, mdp_in, t1);
+ sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
+}
+
+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
+ Register reg,
+ int offset_of_disp) {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+ add(t1, mdp_in, reg);
+ ld(t1, Address(t1, offset_of_disp));
+ add(mdp_in, mdp_in, t1);
+ sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
+}
+
+
+void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in,
+ int constant) {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+ addi(mdp_in, mdp_in, (unsigned)constant);
+ sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
+}
+
+
+void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
+ assert(ProfileInterpreter, "must be profiling interpreter");
+
+ // save/restore across call_VM
+ addi(sp, sp, -2 * wordSize);
+ sd(zr, Address(sp, 0));
+ sd(return_bci, Address(sp, wordSize));
+ call_VM(noreg,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
+ return_bci);
+ ld(zr, Address(sp, 0));
+ ld(return_bci, Address(sp, wordSize));
+ addi(sp, sp, 2 * wordSize);
+}
+
+void InterpreterMacroAssembler::profile_taken_branch(Register mdp,
+ Register bumped_count) {
+ if (ProfileInterpreter) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ // Otherwise, assign to mdp
+ test_method_data_pointer(mdp, profile_continue);
+
+ // We are taking a branch. Increment the taken count.
+ Address data(mdp, in_bytes(JumpData::taken_offset()));
+ ld(bumped_count, data);
+ assert(DataLayout::counter_increment == 1,
+ "flow-free idiom only works with 1");
+ addi(bumped_count, bumped_count, DataLayout::counter_increment);
+ Label L;
+ // eg: bumped_count=0x7fff ffff ffff ffff + 1 < 0. so we use <= 0;
+ blez(bumped_count, L); // skip store if counter overflow,
+ sd(bumped_count, data);
+ bind(L);
+ // The method data pointer needs to be updated to reflect the new target.
+ update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
+ bind(profile_continue);
+ }
+}
+
+void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
+ if (ProfileInterpreter) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ // We are taking a branch. Increment the not taken count.
+ increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()));
+
+ // The method data pointer needs to be updated to correspond to
+ // the next bytecode
+ update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
+ bind(profile_continue);
+ }
+}
+
+void InterpreterMacroAssembler::profile_call(Register mdp) {
+ if (ProfileInterpreter) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ // We are making a call. Increment the count.
+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+ // The method data pointer needs to be updated to reflect the new target.
+ update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
+ bind(profile_continue);
+ }
+}
+
+void InterpreterMacroAssembler::profile_final_call(Register mdp) {
+ if (ProfileInterpreter) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ // We are making a call. Increment the count.
+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+ // The method data pointer needs to be updated to reflect the new target.
+ update_mdp_by_constant(mdp,
+ in_bytes(VirtualCallData::
+ virtual_call_data_size()));
+ bind(profile_continue);
+ }
+}
+
+
+void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
+ Register mdp,
+ Register reg2,
+ bool receiver_can_be_null) {
+ if (ProfileInterpreter) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ Label skip_receiver_profile;
+ if (receiver_can_be_null) {
+ Label not_null;
+ // We are making a call. Increment the count for null receiver.
+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+ j(skip_receiver_profile);
+ bind(not_null);
+ }
+
+ // Record the receiver type.
+ record_klass_in_profile(receiver, mdp, reg2, true);
+ bind(skip_receiver_profile);
+
+ // The method data pointer needs to be updated to reflect the new target.
+
+ update_mdp_by_constant(mdp,
+ in_bytes(VirtualCallData::
+ virtual_call_data_size()));
+ bind(profile_continue);
+ }
+}
+
+// This routine creates a state machine for updating the multi-row
+// type profile at a virtual call site (or other type-sensitive bytecode).
+// The machine visits each row (of receiver/count) until the receiver type
+// is found, or until it runs out of rows. At the same time, it remembers
+// the location of the first empty row. (An empty row records null for its
+// receiver, and can be allocated for a newly-observed receiver type.)
+// Because there are two degrees of freedom in the state, a simple linear
+// search will not work; it must be a decision tree. Hence this helper
+// function is recursive, to generate the required tree structured code.
+// It's the interpreter, so we are trading off code space for speed.
+// See below for example code.
+void InterpreterMacroAssembler::record_klass_in_profile_helper(
+ Register receiver, Register mdp,
+ Register reg2,
+ Label& done, bool is_virtual_call) {
+ if (TypeProfileWidth == 0) {
+ if (is_virtual_call) {
+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+ }
+
+ } else {
+ int non_profiled_offset = -1;
+ if (is_virtual_call) {
+ non_profiled_offset = in_bytes(CounterData::count_offset());
+ }
+
+ record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth,
+ &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset);
+ }
+}
+
+void InterpreterMacroAssembler::record_item_in_profile_helper(
+ Register item, Register mdp, Register reg2, int start_row, Label& done, int total_rows,
+ OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, int non_profiled_offset) {
+ int last_row = total_rows - 1;
+ assert(start_row <= last_row, "must be work left to do");
+ // Test this row for both the item and for null.
+ // Take any of three different outcomes:
+ // 1. found item => increment count and goto done
+ // 2. found null => keep looking for case 1, maybe allocate this cell
+ // 3. found something else => keep looking for cases 1 and 2
+ // Case 3 is handled by a recursive call.
+ for (int row = start_row; row <= last_row; row++) {
+ Label next_test;
+ bool test_for_null_also = (row == start_row);
+
+ // See if the item is item[n].
+ int item_offset = in_bytes(item_offset_fn(row));
+ test_mdp_data_at(mdp, item_offset, item,
+ (test_for_null_also ? reg2 : noreg),
+ next_test);
+ // (Reg2 now contains the item from the CallData.)
+
+ // The item is item[n]. Increment count[n].
+ int count_offset = in_bytes(item_count_offset_fn(row));
+ increment_mdp_data_at(mdp, count_offset);
+ j(done);
+ bind(next_test);
+
+ if (test_for_null_also) {
+ Label found_null;
+ // Failed the equality check on item[n]... Test for null.
+ if (start_row == last_row) {
+ // The only thing left to do is handle the null case.
+ if (non_profiled_offset >= 0) {
+ beqz(reg2, found_null);
+ // Item did not match any saved item and there is no empty row for it.
+ // Increment total counter to indicate polymorphic case.
+ increment_mdp_data_at(mdp, non_profiled_offset);
+ j(done);
+ bind(found_null);
+ } else {
+ bnez(reg2, done);
+ }
+ break;
+ }
+ // Since null is rare, make it be the branch-taken case.
+ beqz(reg2, found_null);
+
+ // Put all the "Case 3" tests here.
+ record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows,
+ item_offset_fn, item_count_offset_fn, non_profiled_offset);
+
+ // Found a null. Keep searching for a matching item,
+ // but remember that this is an empty (unused) slot.
+ bind(found_null);
+ }
+ }
+
+ // In the fall-through case, we found no matching item, but we
+ // observed the item[start_row] is NULL.
+ // Fill in the item field and increment the count.
+ int item_offset = in_bytes(item_offset_fn(start_row));
+ set_mdp_data_at(mdp, item_offset, item);
+ int count_offset = in_bytes(item_count_offset_fn(start_row));
+ mv(reg2, DataLayout::counter_increment);
+ set_mdp_data_at(mdp, count_offset, reg2);
+ if (start_row > 0) {
+ j(done);
+ }
+}
+
+// Example state machine code for three profile rows:
+// # main copy of decision tree, rooted at row[1]
+// if (row[0].rec == rec) then [
+// row[0].incr()
+// goto done
+// ]
+// if (row[0].rec != NULL) then [
+// # inner copy of decision tree, rooted at row[1]
+// if (row[1].rec == rec) then [
+// row[1].incr()
+// goto done
+// ]
+// if (row[1].rec != NULL) then [
+// # degenerate decision tree, rooted at row[2]
+// if (row[2].rec == rec) then [
+// row[2].incr()
+// goto done
+// ]
+// if (row[2].rec != NULL) then [
+// count.incr()
+// goto done
+// ] # overflow
+// row[2].init(rec)
+// goto done
+// ] else [
+// # remember row[1] is empty
+// if (row[2].rec == rec) then [
+// row[2].incr()
+// goto done
+// ]
+// row[1].init(rec)
+// goto done
+// ]
+// else [
+// # remember row[0] is empty
+// if (row[1].rec == rec) then [
+// row[1].incr()
+// goto done
+// ]
+// if (row[2].rec == rec) then [
+// row[2].incr()
+// goto done
+// ]
+// row[0].init(rec)
+// goto done
+// ]
+// done:
+
+void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
+ Register mdp, Register reg2,
+ bool is_virtual_call) {
+ assert(ProfileInterpreter, "must be profiling");
+ Label done;
+
+ record_klass_in_profile_helper(receiver, mdp, reg2, done, is_virtual_call);
+
+ bind(done);
+}
+
+void InterpreterMacroAssembler::profile_ret(Register return_bci, Register mdp) {
+ if (ProfileInterpreter) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ // Update the total ret count.
+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+ for (uint row = 0; row < RetData::row_limit(); row++) {
+ Label next_test;
+
+ // See if return_bci is equal to bci[n]:
+ test_mdp_data_at(mdp,
+ in_bytes(RetData::bci_offset(row)),
+ return_bci, noreg,
+ next_test);
+
+ // return_bci is equal to bci[n]. Increment the count.
+ increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
+
+ // The method data pointer needs to be updated to reflect the new target.
+ update_mdp_by_offset(mdp,
+ in_bytes(RetData::bci_displacement_offset(row)));
+ j(profile_continue);
+ bind(next_test);
+ }
+
+ update_mdp_for_ret(return_bci);
+
+ bind(profile_continue);
+ }
+}
+
+void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
+ if (ProfileInterpreter) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
+
+ // The method data pointer needs to be updated.
+ int mdp_delta = in_bytes(BitData::bit_data_size());
+ if (TypeProfileCasts) {
+ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
+ }
+ update_mdp_by_constant(mdp, mdp_delta);
+
+ bind(profile_continue);
+ }
+}
+
+void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) {
+ if (ProfileInterpreter && TypeProfileCasts) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ int count_offset = in_bytes(CounterData::count_offset());
+ // Back up the address, since we have already bumped the mdp.
+ count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
+
+ // *Decrement* the counter. We expect to see zero or small negatives.
+ increment_mdp_data_at(mdp, count_offset, true);
+
+ bind (profile_continue);
+ }
+}
+
+void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
+ if (ProfileInterpreter) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ // The method data pointer needs to be updated.
+ int mdp_delta = in_bytes(BitData::bit_data_size());
+ if (TypeProfileCasts) {
+ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
+
+ // Record the object type.
+ record_klass_in_profile(klass, mdp, reg2, false);
+ }
+ update_mdp_by_constant(mdp, mdp_delta);
+
+ bind(profile_continue);
+ }
+}
+
+void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
+ if (ProfileInterpreter) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ // Update the default case count
+ increment_mdp_data_at(mdp,
+ in_bytes(MultiBranchData::default_count_offset()));
+
+ // The method data pointer needs to be updated.
+ update_mdp_by_offset(mdp,
+ in_bytes(MultiBranchData::
+ default_displacement_offset()));
+
+ bind(profile_continue);
+ }
+}
+
+void InterpreterMacroAssembler::profile_switch_case(Register index,
+ Register mdp,
+ Register reg2) {
+ if (ProfileInterpreter) {
+ Label profile_continue;
+
+ // If no method data exists, go to profile_continue.
+ test_method_data_pointer(mdp, profile_continue);
+
+ // Build the base (index * per_case_size_in_bytes()) +
+ // case_array_offset_in_bytes()
+ mvw(reg2, in_bytes(MultiBranchData::per_case_size()));
+ mvw(t0, in_bytes(MultiBranchData::case_array_offset()));
+ Assembler::mul(index, index, reg2);
+ Assembler::add(index, index, t0);
+
+ // Update the case count
+ increment_mdp_data_at(mdp,
+ index,
+ in_bytes(MultiBranchData::relative_count_offset()));
+
+ // The method data pointer need to be updated.
+ update_mdp_by_offset(mdp,
+ index,
+ in_bytes(MultiBranchData::
+ relative_displacement_offset()));
+
+ bind(profile_continue);
+ }
+}
+
+void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ; }
+
+void InterpreterMacroAssembler::notify_method_entry() {
+ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
+ // track stack depth. If it is possible to enter interp_only_mode we add
+ // the code to check if the event should be sent.
+ if (JvmtiExport::can_post_interpreter_events()) {
+ Label L;
+ lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset()));
+ beqz(x13, L);
+ call_VM(noreg, CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::post_method_entry));
+ bind(L);
+ }
+
+ {
+ SkipIfEqual skip(this, &DTraceMethodProbes, false);
+ get_method(c_rarg1);
+ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
+ xthread, c_rarg1);
+ }
+
+ // RedefineClasses() tracing support for obsolete method entry
+ if (log_is_enabled(Trace, redefine, class, obsolete)) {
+ get_method(c_rarg1);
+ call_VM_leaf(
+ CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
+ xthread, c_rarg1);
+ }
+}
+
+
+void InterpreterMacroAssembler::notify_method_exit(
+ TosState state, NotifyMethodExitMode mode) {
+ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
+ // track stack depth. If it is possible to enter interp_only_mode we add
+ // the code to check if the event should be sent.
+ if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
+ Label L;
+ // Note: frame::interpreter_frame_result has a dependency on how the
+ // method result is saved across the call to post_method_exit. If this
+ // is changed then the interpreter_frame_result implementation will
+ // need to be updated too.
+
+ // template interpreter will leave the result on the top of the stack.
+ push(state);
+ lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset()));
+ beqz(x13, L);
+ call_VM(noreg,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
+ bind(L);
+ pop(state);
+ }
+
+ {
+ SkipIfEqual skip(this, &DTraceMethodProbes, false);
+ push(state);
+ get_method(c_rarg1);
+ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
+ xthread, c_rarg1);
+ pop(state);
+ }
+}
+
+
+// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
+void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
+ int increment, Address mask,
+ Register tmp1, Register tmp2,
+ bool preloaded, Label* where) {
+ Label done;
+ if (!preloaded) {
+ lwu(tmp1, counter_addr);
+ }
+ add(tmp1, tmp1, increment);
+ sw(tmp1, counter_addr);
+ lwu(tmp2, mask);
+ andr(tmp1, tmp1, tmp2);
+ bnez(tmp1, done);
+ j(*where); // offset is too large so we have to use j instead of beqz here
+ bind(done);
+}
+
+void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
+ int number_of_arguments) {
+ // interpreter specific
+ //
+ // Note: No need to save/restore rbcp & rlocals pointer since these
+ // are callee saved registers and no blocking/ GC can happen
+ // in leaf calls.
+#ifdef ASSERT
+ {
+ Label L;
+ ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
+ beqz(t0, L);
+ stop("InterpreterMacroAssembler::call_VM_leaf_base:"
+ " last_sp != NULL");
+ bind(L);
+ }
+#endif /* ASSERT */
+ // super call
+ MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
+}
+
+void InterpreterMacroAssembler::call_VM_base(Register oop_result,
+ Register java_thread,
+ Register last_java_sp,
+ address entry_point,
+ int number_of_arguments,
+ bool check_exceptions) {
+ // interpreter specific
+ //
+ // Note: Could avoid restoring locals ptr (callee saved) - however doesn't
+ // really make a difference for these runtime calls, since they are
+ // slow anyway. Btw., bcp must be saved/restored since it may change
+ // due to GC.
+ save_bcp();
+#ifdef ASSERT
+ {
+ Label L;
+ ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
+ beqz(t0, L);
+ stop("InterpreterMacroAssembler::call_VM_base:"
+ " last_sp != NULL");
+ bind(L);
+ }
+#endif /* ASSERT */
+ // super call
+ MacroAssembler::call_VM_base(oop_result, noreg, last_java_sp,
+ entry_point, number_of_arguments,
+ check_exceptions);
+// interpreter specific
+ restore_bcp();
+ restore_locals();
+}
+
+void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr, Register tmp) {
+ assert_different_registers(obj, tmp, t0, mdo_addr.base());
+ Label update, next, none;
+
+ verify_oop(obj);
+
+ bnez(obj, update);
+ orptr(mdo_addr, TypeEntries::null_seen, t0, tmp);
+ j(next);
+
+ bind(update);
+ load_klass(obj, obj);
+
+ ld(t0, mdo_addr);
+ xorr(obj, obj, t0);
+ andi(t0, obj, TypeEntries::type_klass_mask);
+ beqz(t0, next); // klass seen before, nothing to
+ // do. The unknown bit may have been
+ // set already but no need to check.
+
+ andi(t0, obj, TypeEntries::type_unknown);
+ bnez(t0, next);
+ // already unknown. Nothing to do anymore.
+
+ ld(t0, mdo_addr);
+ beqz(t0, none);
+ li(tmp, (u1)TypeEntries::null_seen);
+ beq(t0, tmp, none);
+ // There is a chance that the checks above (re-reading profiling
+ // data from memory) fail if another thread has just set the
+ // profiling to this obj's klass
+ ld(t0, mdo_addr);
+ xorr(obj, obj, t0);
+ andi(t0, obj, TypeEntries::type_klass_mask);
+ beqz(t0, next);
+
+ // different than before. Cannot keep accurate profile.
+ orptr(mdo_addr, TypeEntries::type_unknown, t0, tmp);
+ j(next);
+
+ bind(none);
+ // first time here. Set profile type.
+ sd(obj, mdo_addr);
+
+ bind(next);
+}
+
+void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) {
+ if (!ProfileInterpreter) {
+ return;
+ }
+
+ if (MethodData::profile_arguments() || MethodData::profile_return()) {
+ Label profile_continue;
+
+ test_method_data_pointer(mdp, profile_continue);
+
+ int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
+
+ lbu(t0, Address(mdp, in_bytes(DataLayout::tag_offset()) - off_to_start));
+ if (is_virtual) {
+ li(tmp, (u1)DataLayout::virtual_call_type_data_tag);
+ bne(t0, tmp, profile_continue);
+ } else {
+ li(tmp, (u1)DataLayout::call_type_data_tag);
+ bne(t0, tmp, profile_continue);
+ }
+
+ // calculate slot step
+ static int stack_slot_offset0 = in_bytes(TypeEntriesAtCall::stack_slot_offset(0));
+ static int slot_step = in_bytes(TypeEntriesAtCall::stack_slot_offset(1)) - stack_slot_offset0;
+
+ // calculate type step
+ static int argument_type_offset0 = in_bytes(TypeEntriesAtCall::argument_type_offset(0));
+ static int type_step = in_bytes(TypeEntriesAtCall::argument_type_offset(1)) - argument_type_offset0;
+
+ if (MethodData::profile_arguments()) {
+ Label done, loop, loopEnd, profileArgument, profileReturnType;
+ RegSet pushed_registers;
+ pushed_registers += x15;
+ pushed_registers += x16;
+ pushed_registers += x17;
+ Register mdo_addr = x15;
+ Register index = x16;
+ Register off_to_args = x17;
+ push_reg(pushed_registers, sp);
+
+ mv(off_to_args, in_bytes(TypeEntriesAtCall::args_data_offset()));
+ mv(t0, TypeProfileArgsLimit);
+ beqz(t0, loopEnd);
+
+ mv(index, zr); // index < TypeProfileArgsLimit
+ bind(loop);
+ bgtz(index, profileReturnType);
+ li(t0, (int)MethodData::profile_return());
+ beqz(t0, profileArgument); // (index > 0 || MethodData::profile_return()) == false
+ bind(profileReturnType);
+ // If return value type is profiled we may have no argument to profile
+ ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())));
+ mv(t1, - TypeStackSlotEntries::per_arg_count());
+ mul(t1, index, t1);
+ add(tmp, tmp, t1);
+ li(t1, TypeStackSlotEntries::per_arg_count());
+ add(t0, mdp, off_to_args);
+ blt(tmp, t1, done);
+
+ bind(profileArgument);
+
+ ld(tmp, Address(callee, Method::const_offset()));
+ load_unsigned_short(tmp, Address(tmp, ConstMethod::size_of_parameters_offset()));
+ // stack offset o (zero based) from the start of the argument
+ // list, for n arguments translates into offset n - o - 1 from
+ // the end of the argument list
+ li(t0, stack_slot_offset0);
+ li(t1, slot_step);
+ mul(t1, index, t1);
+ add(t0, t0, t1);
+ add(t0, mdp, t0);
+ ld(t0, Address(t0));
+ sub(tmp, tmp, t0);
+ addi(tmp, tmp, -1);
+ Address arg_addr = argument_address(tmp);
+ ld(tmp, arg_addr);
+
+ li(t0, argument_type_offset0);
+ li(t1, type_step);
+ mul(t1, index, t1);
+ add(t0, t0, t1);
+ add(mdo_addr, mdp, t0);
+ Address mdo_arg_addr(mdo_addr, 0);
+ profile_obj_type(tmp, mdo_arg_addr, t1);
+
+ int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
+ addi(off_to_args, off_to_args, to_add);
+
+ // increment index by 1
+ addi(index, index, 1);
+ li(t1, TypeProfileArgsLimit);
+ blt(index, t1, loop);
+ bind(loopEnd);
+
+ if (MethodData::profile_return()) {
+ ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())));
+ addi(tmp, tmp, -TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count());
+ }
+
+ add(t0, mdp, off_to_args);
+ bind(done);
+ mv(mdp, t0);
+
+ // unspill the clobbered registers
+ pop_reg(pushed_registers, sp);
+
+ if (MethodData::profile_return()) {
+ // We're right after the type profile for the last
+ // argument. tmp is the number of cells left in the
+ // CallTypeData/VirtualCallTypeData to reach its end. Non null
+ // if there's a return to profile.
+ assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
+ shadd(mdp, tmp, mdp, tmp, exact_log2(DataLayout::cell_size));
+ }
+ sd(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
+ } else {
+ assert(MethodData::profile_return(), "either profile call args or call ret");
+ update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size()));
+ }
+
+ // mdp points right after the end of the
+ // CallTypeData/VirtualCallTypeData, right after the cells for the
+ // return value type if there's one
+
+ bind(profile_continue);
+ }
+}
+
+void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) {
+ assert_different_registers(mdp, ret, tmp, xbcp, t0, t1);
+ if (ProfileInterpreter && MethodData::profile_return()) {
+ Label profile_continue, done;
+
+ test_method_data_pointer(mdp, profile_continue);
+
+ if (MethodData::profile_return_jsr292_only()) {
+ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
+
+ // If we don't profile all invoke bytecodes we must make sure
+ // it's a bytecode we indeed profile. We can't go back to the
+ // begining of the ProfileData we intend to update to check its
+ // type because we're right after it and we don't known its
+ // length
+ Label do_profile;
+ lbu(t0, Address(xbcp, 0));
+ li(tmp, (u1)Bytecodes::_invokedynamic);
+ beq(t0, tmp, do_profile);
+ li(tmp, (u1)Bytecodes::_invokehandle);
+ beq(t0, tmp, do_profile);
+ get_method(tmp);
+ lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes()));
+ li(t1, static_cast(vmIntrinsics::_compiledLambdaForm));
+ bne(t0, t1, profile_continue);
+ bind(do_profile);
+ }
+
+ Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size()));
+ mv(tmp, ret);
+ profile_obj_type(tmp, mdo_ret_addr, t1);
+
+ bind(profile_continue);
+ }
+}
+
+void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3) {
+ assert_different_registers(t0, t1, mdp, tmp1, tmp2, tmp3);
+ if (ProfileInterpreter && MethodData::profile_parameters()) {
+ Label profile_continue, done;
+
+ test_method_data_pointer(mdp, profile_continue);
+
+ // Load the offset of the area within the MDO used for
+ // parameters. If it's negative we're not profiling any parameters
+ lwu(tmp1, Address(mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset())));
+ srli(tmp2, tmp1, 31);
+ bnez(tmp2, profile_continue); // i.e. sign bit set
+
+ // Compute a pointer to the area for parameters from the offset
+ // and move the pointer to the slot for the last
+ // parameters. Collect profiling from last parameter down.
+ // mdo start + parameters offset + array length - 1
+ add(mdp, mdp, tmp1);
+ ld(tmp1, Address(mdp, ArrayData::array_len_offset()));
+ add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count());
+
+ Label loop;
+ bind(loop);
+
+ int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
+ int type_base = in_bytes(ParametersTypeData::type_offset(0));
+ int per_arg_scale = exact_log2(DataLayout::cell_size);
+ add(t0, mdp, off_base);
+ add(t1, mdp, type_base);
+
+ shadd(tmp2, tmp1, t0, tmp2, per_arg_scale);
+ // load offset on the stack from the slot for this parameter
+ ld(tmp2, Address(tmp2, 0));
+ neg(tmp2, tmp2);
+
+ // read the parameter from the local area
+ shadd(tmp2, tmp2, xlocals, tmp2, Interpreter::logStackElementSize);
+ ld(tmp2, Address(tmp2, 0));
+
+ // profile the parameter
+ shadd(t1, tmp1, t1, t0, per_arg_scale);
+ Address arg_type(t1, 0);
+ profile_obj_type(tmp2, arg_type, tmp3);
+
+ // go to next parameter
+ add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count());
+ bgez(tmp1, loop);
+
+ bind(profile_continue);
+ }
+}
+
+void InterpreterMacroAssembler::get_method_counters(Register method,
+ Register mcs, Label& skip) {
+ Label has_counters;
+ ld(mcs, Address(method, Method::method_counters_offset()));
+ bnez(mcs, has_counters);
+ call_VM(noreg, CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::build_method_counters), method);
+ ld(mcs, Address(method, Method::method_counters_offset()));
+ beqz(mcs, skip); // No MethodCounters allocated, OutOfMemory
+ bind(has_counters);
+}
+
+#ifdef ASSERT
+void InterpreterMacroAssembler::verify_access_flags(Register access_flags, uint32_t flag_bits,
+ const char* msg, bool stop_by_hit) {
+ Label L;
+ andi(t0, access_flags, flag_bits);
+ if (stop_by_hit) {
+ beqz(t0, L);
+ } else {
+ bnez(t0, L);
+ }
+ stop(msg);
+ bind(L);
+}
+
+void InterpreterMacroAssembler::verify_frame_setup() {
+ Label L;
+ const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+ ld(t0, monitor_block_top);
+ beq(esp, t0, L);
+ stop("broken stack frame setup in interpreter");
+ bind(L);
+}
+#endif
diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..4d8cb086f829f1a661de480a15f0701a6bbba6e4
--- /dev/null
+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_INTERP_MASM_RISCV_HPP
+#define CPU_RISCV_INTERP_MASM_RISCV_HPP
+
+#include "asm/macroAssembler.hpp"
+#include "interpreter/invocationCounter.hpp"
+#include "runtime/frame.hpp"
+
+// This file specializes the assember with interpreter-specific macros
+
+typedef ByteSize (*OffsetFunction)(uint);
+
+class InterpreterMacroAssembler: public MacroAssembler {
+ protected:
+ // Interpreter specific version of call_VM_base
+ using MacroAssembler::call_VM_leaf_base;
+
+ virtual void call_VM_leaf_base(address entry_point,
+ int number_of_arguments);
+
+ virtual void call_VM_base(Register oop_result,
+ Register java_thread,
+ Register last_java_sp,
+ address entry_point,
+ int number_of_arguments,
+ bool check_exceptions);
+
+ // base routine for all dispatches
+ void dispatch_base(TosState state, address* table, bool verifyoop = true,
+ bool generate_poll = false, Register Rs = t0);
+
+ public:
+ InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {}
+ virtual ~InterpreterMacroAssembler() {}
+
+ void load_earlyret_value(TosState state);
+
+ void jump_to_entry(address entry);
+
+ virtual void check_and_handle_popframe(Register java_thread);
+ virtual void check_and_handle_earlyret(Register java_thread);
+
+ // Interpreter-specific registers
+ void save_bcp() {
+ sd(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize));
+ }
+
+ void restore_bcp() {
+ ld(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize));
+ }
+
+ void restore_locals() {
+ ld(xlocals, Address(fp, frame::interpreter_frame_locals_offset * wordSize));
+ }
+
+ void restore_constant_pool_cache() {
+ ld(xcpool, Address(fp, frame::interpreter_frame_cache_offset * wordSize));
+ }
+
+ void get_dispatch();
+
+ // Helpers for runtime call arguments/results
+ void get_method(Register reg) {
+ ld(reg, Address(fp, frame::interpreter_frame_method_offset * wordSize));
+ }
+
+ void get_const(Register reg) {
+ get_method(reg);
+ ld(reg, Address(reg, in_bytes(Method::const_offset())));
+ }
+
+ void get_constant_pool(Register reg) {
+ get_const(reg);
+ ld(reg, Address(reg, in_bytes(ConstMethod::constants_offset())));
+ }
+
+ void get_constant_pool_cache(Register reg) {
+ get_constant_pool(reg);
+ ld(reg, Address(reg, ConstantPool::cache_offset_in_bytes()));
+ }
+
+ void get_cpool_and_tags(Register cpool, Register tags) {
+ get_constant_pool(cpool);
+ ld(tags, Address(cpool, ConstantPool::tags_offset_in_bytes()));
+ }
+
+ void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
+ void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2));
+ void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
+ void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
+ void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2));
+ void get_method_counters(Register method, Register mcs, Label& skip);
+
+ // Load cpool->resolved_references(index).
+ void load_resolved_reference_at_index(Register result, Register index, Register tmp = x15);
+
+ // Load cpool->resolved_klass_at(index).
+ void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp);
+
+ void load_resolved_method_at_index(int byte_no, Register method, Register cache);
+
+ void pop_ptr(Register r = x10);
+ void pop_i(Register r = x10);
+ void pop_l(Register r = x10);
+ void pop_f(FloatRegister r = f10);
+ void pop_d(FloatRegister r = f10);
+ void push_ptr(Register r = x10);
+ void push_i(Register r = x10);
+ void push_l(Register r = x10);
+ void push_f(FloatRegister r = f10);
+ void push_d(FloatRegister r = f10);
+
+ void pop(TosState state); // transition vtos -> state
+ void push(TosState state); // transition state -> vtos
+
+ void empty_expression_stack() {
+ ld(esp, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize));
+ // NULL last_sp until next java call
+ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
+ }
+
+ // Helpers for swap and dup
+ void load_ptr(int n, Register val);
+ void store_ptr(int n, Register val);
+
+ // Load float value from 'address'. The value is loaded onto the FPU register v0.
+ void load_float(Address src);
+ void load_double(Address src);
+
+ // Generate a subtype check: branch to ok_is_subtype if sub_klass is
+ // a subtype of super_klass.
+ void gen_subtype_check( Register sub_klass, Label &ok_is_subtype );
+
+ // Dispatching
+ void dispatch_prolog(TosState state, int step = 0);
+ void dispatch_epilog(TosState state, int step = 0);
+ // dispatch via t0
+ void dispatch_only(TosState state, bool generate_poll = false, Register Rs = t0);
+ // dispatch normal table via t0 (assume t0 is loaded already)
+ void dispatch_only_normal(TosState state, Register Rs = t0);
+ void dispatch_only_noverify(TosState state, Register Rs = t0);
+ // load t0 from [xbcp + step] and dispatch via t0
+ void dispatch_next(TosState state, int step = 0, bool generate_poll = false);
+ // load t0 from [xbcp] and dispatch via t0 and table
+ void dispatch_via (TosState state, address* table);
+
+ // jump to an invoked target
+ void prepare_to_jump_from_interpreted();
+ void jump_from_interpreted(Register method);
+
+
+ // Returning from interpreted functions
+ //
+ // Removes the current activation (incl. unlocking of monitors)
+ // and sets up the return address. This code is also used for
+ // exception unwindwing. In that case, we do not want to throw
+ // IllegalMonitorStateExceptions, since that might get us into an
+ // infinite rethrow exception loop.
+ // Additionally this code is used for popFrame and earlyReturn.
+ // In popFrame case we want to skip throwing an exception,
+ // installing an exception, and notifying jvmdi.
+ // In earlyReturn case we only want to skip throwing an exception
+ // and installing an exception.
+ void remove_activation(TosState state,
+ bool throw_monitor_exception = true,
+ bool install_monitor_exception = true,
+ bool notify_jvmdi = true);
+
+ // FIXME: Give us a valid frame at a null check.
+ virtual void null_check(Register reg, int offset = -1) {
+ MacroAssembler::null_check(reg, offset);
+ }
+
+ // Object locking
+ void lock_object (Register lock_reg);
+ void unlock_object(Register lock_reg);
+
+ // Interpreter profiling operations
+ void set_method_data_pointer_for_bcp();
+ void test_method_data_pointer(Register mdp, Label& zero_continue);
+ void verify_method_data_pointer();
+
+ void set_mdp_data_at(Register mdp_in, int constant, Register value);
+ void increment_mdp_data_at(Address data, bool decrement = false);
+ void increment_mdp_data_at(Register mdp_in, int constant,
+ bool decrement = false);
+ void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
+ bool decrement = false);
+ void increment_mask_and_jump(Address counter_addr,
+ int increment, Address mask,
+ Register tmp1, Register tmp2,
+ bool preloaded, Label* where);
+
+ void set_mdp_flag_at(Register mdp_in, int flag_constant);
+ void test_mdp_data_at(Register mdp_in, int offset, Register value,
+ Register test_value_out,
+ Label& not_equal_continue);
+
+ void record_klass_in_profile(Register receiver, Register mdp,
+ Register reg2, bool is_virtual_call);
+ void record_klass_in_profile_helper(Register receiver, Register mdp,
+ Register reg2,
+ Label& done, bool is_virtual_call);
+ void record_item_in_profile_helper(Register item, Register mdp,
+ Register reg2, int start_row, Label& done, int total_rows,
+ OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
+ int non_profiled_offset);
+
+ void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
+ void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
+ void update_mdp_by_constant(Register mdp_in, int constant);
+ void update_mdp_for_ret(Register return_bci);
+
+ // narrow int return value
+ void narrow(Register result);
+
+ void profile_taken_branch(Register mdp, Register bumped_count);
+ void profile_not_taken_branch(Register mdp);
+ void profile_call(Register mdp);
+ void profile_final_call(Register mdp);
+ void profile_virtual_call(Register receiver, Register mdp,
+ Register t1,
+ bool receiver_can_be_null = false);
+ void profile_ret(Register return_bci, Register mdp);
+ void profile_null_seen(Register mdp);
+ void profile_typecheck(Register mdp, Register klass, Register temp);
+ void profile_typecheck_failed(Register mdp);
+ void profile_switch_default(Register mdp);
+ void profile_switch_case(Register index_in_scratch, Register mdp,
+ Register temp);
+
+ void profile_obj_type(Register obj, const Address& mdo_addr, Register tmp);
+ void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual);
+ void profile_return_type(Register mdp, Register ret, Register tmp);
+ void profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3);
+
+ // Debugging
+ // only if +VerifyFPU && (state == ftos || state == dtos)
+ void verify_FPU(int stack_depth, TosState state = ftos);
+
+ typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
+
+ // support for jvmti/dtrace
+ void notify_method_entry();
+ void notify_method_exit(TosState state, NotifyMethodExitMode mode);
+
+ virtual void _call_Unimplemented(address call_site) {
+ save_bcp();
+ set_last_Java_frame(esp, fp, (address) pc(), t0);
+ MacroAssembler::_call_Unimplemented(call_site);
+ }
+
+#ifdef ASSERT
+ void verify_access_flags(Register access_flags, uint32_t flag_bits,
+ const char* msg, bool stop_by_hit = true);
+ void verify_frame_setup();
+#endif
+};
+
+#endif // CPU_RISCV_INTERP_MASM_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d93530d85640145aaa8ac76e1e15d95cafc22bb8
--- /dev/null
+++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
@@ -0,0 +1,295 @@
+/*
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "interpreter/interp_masm.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "memory/allocation.inline.hpp"
+#include "memory/universe.hpp"
+#include "oops/method.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/icache.hpp"
+#include "runtime/interfaceSupport.inline.hpp"
+#include "runtime/signature.hpp"
+
+#define __ _masm->
+
+// Implementation of SignatureHandlerGenerator
+Register InterpreterRuntime::SignatureHandlerGenerator::from() { return xlocals; }
+Register InterpreterRuntime::SignatureHandlerGenerator::to() { return sp; }
+Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return t0; }
+
+Register InterpreterRuntime::SignatureHandlerGenerator::next_gpr() {
+ if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) {
+ return g_INTArgReg[++_num_reg_int_args];
+ }
+ return noreg;
+}
+
+FloatRegister InterpreterRuntime::SignatureHandlerGenerator::next_fpr() {
+ if (_num_reg_fp_args < Argument::n_float_register_parameters_c) {
+ return g_FPArgReg[_num_reg_fp_args++];
+ } else {
+ return fnoreg;
+ }
+}
+
+int InterpreterRuntime::SignatureHandlerGenerator::next_stack_offset() {
+ int ret = _stack_offset;
+ _stack_offset += wordSize;
+ return ret;
+}
+
+InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator(
+ const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
+ _masm = new MacroAssembler(buffer); // allocate on resourse area by default
+ _num_reg_int_args = (method->is_static() ? 1 : 0);
+ _num_reg_fp_args = 0;
+ _stack_offset = 0;
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
+ const Address src(from(), Interpreter::local_offset_in_bytes(offset()));
+
+ Register reg = next_gpr();
+ if (reg != noreg) {
+ __ lw(reg, src);
+ } else {
+ __ lw(x10, src);
+ __ sw(x10, Address(to(), next_stack_offset()));
+ }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
+ const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1));
+
+ Register reg = next_gpr();
+ if (reg != noreg) {
+ __ ld(reg, src);
+ } else {
+ __ ld(x10, src);
+ __ sd(x10, Address(to(), next_stack_offset()));
+ }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
+ const Address src(from(), Interpreter::local_offset_in_bytes(offset()));
+
+ FloatRegister reg = next_fpr();
+ if (reg != fnoreg) {
+ __ flw(reg, src);
+ } else {
+ // a floating-point argument is passed according to the integer calling
+ // convention if no floating-point argument register available
+ pass_int();
+ }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
+ const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1));
+
+ FloatRegister reg = next_fpr();
+ if (reg != fnoreg) {
+ __ fld(reg, src);
+ } else {
+ // a floating-point argument is passed according to the integer calling
+ // convention if no floating-point argument register available
+ pass_long();
+ }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
+ Register reg = next_gpr();
+ if (reg == c_rarg1) {
+ assert(offset() == 0, "argument register 1 can only be (non-null) receiver");
+ __ addi(c_rarg1, from(), Interpreter::local_offset_in_bytes(offset()));
+ } else if (reg != noreg) {
+ // c_rarg2-c_rarg7
+ __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset()));
+ __ mv(reg, zr); //_num_reg_int_args:c_rarg -> 1:c_rarg2, 2:c_rarg3...
+ __ ld(temp(), x10);
+ Label L;
+ __ beqz(temp(), L);
+ __ mv(reg, x10);
+ __ bind(L);
+ } else {
+ //to stack
+ __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset()));
+ __ ld(temp(), x10);
+ Label L;
+ __ bnez(temp(), L);
+ __ mv(x10, zr);
+ __ bind(L);
+ assert(sizeof(jobject) == wordSize, "");
+ __ sd(x10, Address(to(), next_stack_offset()));
+ }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
+ // generate code to handle arguments
+ iterate(fingerprint);
+
+ // return result handler
+ __ la(x10, ExternalAddress(Interpreter::result_handler(method()->result_type())));
+ __ ret();
+
+ __ flush();
+}
+
+
+// Implementation of SignatureHandlerLibrary
+
+void SignatureHandlerLibrary::pd_set_handler(address handler) {}
+
+
+class SlowSignatureHandler
+ : public NativeSignatureIterator {
+ private:
+ address _from;
+ intptr_t* _to;
+ intptr_t* _int_args;
+ intptr_t* _fp_args;
+ intptr_t* _fp_identifiers;
+ unsigned int _num_reg_int_args;
+ unsigned int _num_reg_fp_args;
+
+ intptr_t* single_slot_addr() {
+ intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0));
+ _from -= Interpreter::stackElementSize;
+ return from_addr;
+ }
+
+ intptr_t* double_slot_addr() {
+ intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(1));
+ _from -= 2 * Interpreter::stackElementSize;
+ return from_addr;
+ }
+
+ int pass_gpr(intptr_t value) {
+ if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) {
+ *_int_args++ = value;
+ return _num_reg_int_args++;
+ }
+ return -1;
+ }
+
+ int pass_fpr(intptr_t value) {
+ if (_num_reg_fp_args < Argument::n_float_register_parameters_c) {
+ *_fp_args++ = value;
+ return _num_reg_fp_args++;
+ }
+ return -1;
+ }
+
+ void pass_stack(intptr_t value) {
+ *_to++ = value;
+ }
+
+ virtual void pass_int() {
+ jint value = *(jint*)single_slot_addr();
+ if (pass_gpr(value) < 0) {
+ pass_stack(value);
+ }
+ }
+
+ virtual void pass_long() {
+ intptr_t value = *double_slot_addr();
+ if (pass_gpr(value) < 0) {
+ pass_stack(value);
+ }
+ }
+
+ virtual void pass_object() {
+ intptr_t* addr = single_slot_addr();
+ intptr_t value = *addr == 0 ? NULL : (intptr_t)addr;
+ if (pass_gpr(value) < 0) {
+ pass_stack(value);
+ }
+ }
+
+ virtual void pass_float() {
+ jint value = *(jint*) single_slot_addr();
+ // a floating-point argument is passed according to the integer calling
+ // convention if no floating-point argument register available
+ if (pass_fpr(value) < 0 && pass_gpr(value) < 0) {
+ pass_stack(value);
+ }
+ }
+
+ virtual void pass_double() {
+ intptr_t value = *double_slot_addr();
+ int arg = pass_fpr(value);
+ if (0 <= arg) {
+ *_fp_identifiers |= (1ull << arg); // mark as double
+ } else if (pass_gpr(value) < 0) { // no need to mark if passing by integer registers or stack
+ pass_stack(value);
+ }
+ }
+
+ public:
+ SlowSignatureHandler(const methodHandle& method, address from, intptr_t* to)
+ : NativeSignatureIterator(method)
+ {
+ _from = from;
+ _to = to;
+
+ _int_args = to - (method->is_static() ? 16 : 17);
+ _fp_args = to - 8;
+ _fp_identifiers = to - 9;
+ *(int*) _fp_identifiers = 0;
+ _num_reg_int_args = (method->is_static() ? 1 : 0);
+ _num_reg_fp_args = 0;
+ }
+
+ ~SlowSignatureHandler()
+ {
+ _from = NULL;
+ _to = NULL;
+ _int_args = NULL;
+ _fp_args = NULL;
+ _fp_identifiers = NULL;
+ }
+};
+
+
+JRT_ENTRY(address,
+ InterpreterRuntime::slow_signature_handler(JavaThread* current,
+ Method* method,
+ intptr_t* from,
+ intptr_t* to))
+ methodHandle m(current, (Method*)method);
+ assert(m->is_native(), "sanity check");
+
+ // handle arguments
+ SlowSignatureHandler ssh(m, (address)from, to);
+ ssh.iterate(UCONST64(-1));
+
+ // return result handler
+ return Interpreter::result_handler(m->result_type());
+JRT_END
diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..05df63ba2ae5f1ef079019d4382f8fec774cd39c
--- /dev/null
+++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_INTERPRETERRT_RISCV_HPP
+#define CPU_RISCV_INTERPRETERRT_RISCV_HPP
+
+// This is included in the middle of class Interpreter.
+// Do not include files here.
+
+// native method calls
+
+class SignatureHandlerGenerator: public NativeSignatureIterator {
+ private:
+ MacroAssembler* _masm;
+ unsigned int _num_reg_fp_args;
+ unsigned int _num_reg_int_args;
+ int _stack_offset;
+
+ void pass_int();
+ void pass_long();
+ void pass_float();
+ void pass_double();
+ void pass_object();
+
+ Register next_gpr();
+ FloatRegister next_fpr();
+ int next_stack_offset();
+
+ public:
+ // Creation
+ SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer);
+ virtual ~SignatureHandlerGenerator() {
+ _masm = NULL;
+ }
+
+ // Code generation
+ void generate(uint64_t fingerprint);
+
+ // Code generation support
+ static Register from();
+ static Register to();
+ static Register temp();
+};
+
+#endif // CPU_RISCV_INTERPRETERRT_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..9a6084afa1dee3832a7987af9f295bdc5042e970
--- /dev/null
+++ b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
+#define CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
+
+private:
+
+ // FP value associated with _last_Java_sp:
+ intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to
+
+public:
+ // Each arch must define reset, save, restore
+ // These are used by objects that only care about:
+ // 1 - initializing a new state (thread creation, javaCalls)
+ // 2 - saving a current state (javaCalls)
+ // 3 - restoring an old state (javaCalls)
+
+ void clear(void) {
+ // clearing _last_Java_sp must be first
+ _last_Java_sp = NULL;
+ OrderAccess::release();
+ _last_Java_fp = NULL;
+ _last_Java_pc = NULL;
+ }
+
+ void copy(JavaFrameAnchor* src) {
+ // In order to make sure the transition state is valid for "this"
+ // We must clear _last_Java_sp before copying the rest of the new data
+ //
+ // Hack Alert: Temporary bugfix for 4717480/4721647
+ // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
+ // unless the value is changing
+ //
+ assert(src != NULL, "Src should not be NULL.");
+ if (_last_Java_sp != src->_last_Java_sp) {
+ _last_Java_sp = NULL;
+ OrderAccess::release();
+ }
+ _last_Java_fp = src->_last_Java_fp;
+ _last_Java_pc = src->_last_Java_pc;
+ // Must be last so profiler will always see valid frame if has_last_frame() is true
+ _last_Java_sp = src->_last_Java_sp;
+ }
+
+ bool walkable(void) { return _last_Java_sp != NULL && _last_Java_pc != NULL; }
+ void make_walkable(JavaThread* thread);
+ void capture_last_Java_pc(void);
+
+ intptr_t* last_Java_sp(void) const { return _last_Java_sp; }
+
+ const address last_Java_pc(void) { return _last_Java_pc; }
+
+private:
+
+ static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); }
+
+public:
+
+ void set_last_Java_sp(intptr_t* java_sp) { _last_Java_sp = java_sp; OrderAccess::release(); }
+
+ intptr_t* last_Java_fp(void) { return _last_Java_fp; }
+
+#endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..814ed23e471a26bb50100388fcbe08eb97f9aa5e
--- /dev/null
+++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2004, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
+#include "memory/resourceArea.hpp"
+#include "prims/jniFastGetField.hpp"
+#include "prims/jvm_misc.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "runtime/safepoint.hpp"
+
+#define __ masm->
+
+#define BUFFER_SIZE 30*wordSize
+
+// Instead of issuing a LoadLoad barrier we create an address
+// dependency between loads; this might be more efficient.
+
+// Common register usage:
+// x10/f10: result
+// c_rarg0: jni env
+// c_rarg1: obj
+// c_rarg2: jfield id
+
+static const Register robj = x13;
+static const Register rcounter = x14;
+static const Register roffset = x15;
+static const Register rcounter_addr = x16;
+static const Register result = x17;
+
+address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
+ const char *name;
+ switch (type) {
+ case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
+ case T_BYTE: name = "jni_fast_GetByteField"; break;
+ case T_CHAR: name = "jni_fast_GetCharField"; break;
+ case T_SHORT: name = "jni_fast_GetShortField"; break;
+ case T_INT: name = "jni_fast_GetIntField"; break;
+ case T_LONG: name = "jni_fast_GetLongField"; break;
+ case T_FLOAT: name = "jni_fast_GetFloatField"; break;
+ case T_DOUBLE: name = "jni_fast_GetDoubleField"; break;
+ default: ShouldNotReachHere();
+ name = NULL; // unreachable
+ }
+ ResourceMark rm;
+ BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE);
+ CodeBuffer cbuf(blob);
+ MacroAssembler* masm = new MacroAssembler(&cbuf);
+ address fast_entry = __ pc();
+
+ Label slow;
+ int32_t offset = 0;
+ __ la_patchable(rcounter_addr, SafepointSynchronize::safepoint_counter_addr(), offset);
+ __ addi(rcounter_addr, rcounter_addr, offset);
+
+ Address safepoint_counter_addr(rcounter_addr, 0);
+ __ lwu(rcounter, safepoint_counter_addr);
+ // An even value means there are no ongoing safepoint operations
+ __ andi(t0, rcounter, 1);
+ __ bnez(t0, slow);
+
+ if (JvmtiExport::can_post_field_access()) {
+ // Using barrier to order wrt. JVMTI check and load of result.
+ __ membar(MacroAssembler::LoadLoad);
+
+ // Check to see if a field access watch has been set before we
+ // take the fast path.
+ int32_t offset2;
+ __ la_patchable(result,
+ ExternalAddress((address) JvmtiExport::get_field_access_count_addr()),
+ offset2);
+ __ lwu(result, Address(result, offset2));
+ __ bnez(result, slow);
+
+ __ mv(robj, c_rarg1);
+ } else {
+ // Using address dependency to order wrt. load of result.
+ __ xorr(robj, c_rarg1, rcounter);
+ __ xorr(robj, robj, rcounter); // obj, since
+ // robj ^ rcounter ^ rcounter == robj
+ // robj is address dependent on rcounter.
+ }
+
+ // Both robj and t0 are clobbered by try_resolve_jobject_in_native.
+ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+ assert_cond(bs != NULL);
+ bs->try_resolve_jobject_in_native(masm, c_rarg0, robj, t0, slow);
+
+ __ srli(roffset, c_rarg2, 2); // offset
+
+ assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
+ speculative_load_pclist[count] = __ pc(); // Used by the segfault handler
+ __ add(roffset, robj, roffset);
+
+ switch (type) {
+ case T_BOOLEAN: __ lbu(result, Address(roffset, 0)); break;
+ case T_BYTE: __ lb(result, Address(roffset, 0)); break;
+ case T_CHAR: __ lhu(result, Address(roffset, 0)); break;
+ case T_SHORT: __ lh(result, Address(roffset, 0)); break;
+ case T_INT: __ lw(result, Address(roffset, 0)); break;
+ case T_LONG: __ ld(result, Address(roffset, 0)); break;
+ case T_FLOAT: {
+ __ flw(f28, Address(roffset, 0)); // f28 as temporaries
+ __ fmv_x_w(result, f28); // f{31--0}-->x
+ break;
+ }
+ case T_DOUBLE: {
+ __ fld(f28, Address(roffset, 0)); // f28 as temporaries
+ __ fmv_x_d(result, f28); // d{63--0}-->x
+ break;
+ }
+ default: ShouldNotReachHere();
+ }
+
+ // Using acquire: Order JVMTI check and load of result wrt. succeeding check
+ // (LoadStore for volatile field).
+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+
+ __ lw(t0, safepoint_counter_addr);
+ __ bne(rcounter, t0, slow);
+
+ switch (type) {
+ case T_FLOAT: __ fmv_w_x(f10, result); break;
+ case T_DOUBLE: __ fmv_d_x(f10, result); break;
+ default: __ mv(x10, result); break;
+ }
+ __ ret();
+
+ slowcase_entry_pclist[count++] = __ pc();
+ __ bind(slow);
+ address slow_case_addr;
+ switch (type) {
+ case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
+ case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break;
+ case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break;
+ case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break;
+ case T_INT: slow_case_addr = jni_GetIntField_addr(); break;
+ case T_LONG: slow_case_addr = jni_GetLongField_addr(); break;
+ case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break;
+ case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break;
+ default: ShouldNotReachHere();
+ slow_case_addr = NULL; // unreachable
+ }
+
+ {
+ __ enter();
+ int32_t tmp_offset = 0;
+ __ la_patchable(t0, ExternalAddress(slow_case_addr), tmp_offset);
+ __ jalr(x1, t0, tmp_offset);
+ __ leave();
+ __ ret();
+ }
+ __ flush();
+
+ return fast_entry;
+}
+
+
+address JNI_FastGetField::generate_fast_get_boolean_field() {
+ return generate_fast_get_int_field0(T_BOOLEAN);
+}
+
+address JNI_FastGetField::generate_fast_get_byte_field() {
+ return generate_fast_get_int_field0(T_BYTE);
+}
+
+address JNI_FastGetField::generate_fast_get_char_field() {
+ return generate_fast_get_int_field0(T_CHAR);
+}
+
+address JNI_FastGetField::generate_fast_get_short_field() {
+ return generate_fast_get_int_field0(T_SHORT);
+}
+
+address JNI_FastGetField::generate_fast_get_int_field() {
+ return generate_fast_get_int_field0(T_INT);
+}
+
+address JNI_FastGetField::generate_fast_get_long_field() {
+ return generate_fast_get_int_field0(T_LONG);
+}
+
+address JNI_FastGetField::generate_fast_get_float_field() {
+ return generate_fast_get_int_field0(T_FLOAT);
+}
+
+address JNI_FastGetField::generate_fast_get_double_field() {
+ return generate_fast_get_int_field0(T_DOUBLE);
+}
diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..83ffcc55d835525a7e58af5c3be9fecea833a2c3
--- /dev/null
+++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_JNITYPES_RISCV_HPP
+#define CPU_RISCV_JNITYPES_RISCV_HPP
+
+#include "jni.h"
+#include "memory/allStatic.hpp"
+#include "oops/oop.hpp"
+
+// This file holds platform-dependent routines used to write primitive jni
+// types to the array of arguments passed into JavaCalls::call
+
+class JNITypes : private AllStatic {
+ // These functions write a java primitive type (in native format)
+ // to a java stack slot array to be passed as an argument to JavaCalls:calls.
+ // I.e., they are functionally 'push' operations if they have a 'pos'
+ // formal parameter. Note that jlong's and jdouble's are written
+ // _in reverse_ of the order in which they appear in the interpreter
+ // stack. This is because call stubs (see stubGenerator_sparc.cpp)
+ // reverse the argument list constructed by JavaCallArguments (see
+ // javaCalls.hpp).
+
+public:
+ // Ints are stored in native format in one JavaCallArgument slot at *to.
+ static inline void put_int(jint from, intptr_t *to) { *(jint *)(to + 0 ) = from; }
+ static inline void put_int(jint from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = from; }
+ static inline void put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; }
+
+ // Longs are stored in native format in one JavaCallArgument slot at
+ // *(to+1).
+ static inline void put_long(jlong from, intptr_t *to) {
+ *(jlong*) (to + 1) = from;
+ }
+
+ static inline void put_long(jlong from, intptr_t *to, int& pos) {
+ *(jlong*) (to + 1 + pos) = from;
+ pos += 2;
+ }
+
+ static inline void put_long(jlong *from, intptr_t *to, int& pos) {
+ *(jlong*) (to + 1 + pos) = *from;
+ pos += 2;
+ }
+
+ // Oops are stored in native format in one JavaCallArgument slot at *to.
+ static inline void put_obj(const Handle& from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle.raw_value(); }
+ static inline void put_obj(jobject from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle; }
+
+ // Floats are stored in native format in one JavaCallArgument slot at *to.
+ static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; }
+ static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; }
+ static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
+
+#undef _JNI_SLOT_OFFSET
+#define _JNI_SLOT_OFFSET 1
+ // Doubles are stored in native word format in one JavaCallArgument
+ // slot at *(to+1).
+ static inline void put_double(jdouble from, intptr_t *to) {
+ *(jdouble*) (to + 1) = from;
+ }
+
+ static inline void put_double(jdouble from, intptr_t *to, int& pos) {
+ *(jdouble*) (to + 1 + pos) = from;
+ pos += 2;
+ }
+
+ static inline void put_double(jdouble *from, intptr_t *to, int& pos) {
+ *(jdouble*) (to + 1 + pos) = *from;
+ pos += 2;
+ }
+
+ // The get_xxx routines, on the other hand, actually _do_ fetch
+ // java primitive types from the interpreter stack.
+ // No need to worry about alignment on Intel.
+ static inline jint get_int (intptr_t *from) { return *(jint *) from; }
+ static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + _JNI_SLOT_OFFSET); }
+ static inline oop get_obj (intptr_t *from) { return *(oop *) from; }
+ static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; }
+ static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); }
+#undef _JNI_SLOT_OFFSET
+};
+
+#endif // CPU_RISCV_JNITYPES_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d830405db21deb91f3f9a79109d3e26747a58e46
--- /dev/null
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
@@ -0,0 +1,4019 @@
+/*
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "compiler/disassembler.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
+#include "gc/shared/cardTable.hpp"
+#include "gc/shared/cardTableBarrierSet.hpp"
+#include "interpreter/bytecodeHistogram.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/resourceArea.hpp"
+#include "memory/universe.hpp"
+#include "nativeInst_riscv.hpp"
+#include "oops/accessDecorators.hpp"
+#include "oops/compressedOops.inline.hpp"
+#include "oops/klass.inline.hpp"
+#include "oops/oop.hpp"
+#include "runtime/interfaceSupport.inline.hpp"
+#include "runtime/jniHandles.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/thread.hpp"
+#include "utilities/powerOfTwo.hpp"
+#ifdef COMPILER2
+#include "opto/compile.hpp"
+#include "opto/node.hpp"
+#include "opto/output.hpp"
+#endif
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#else
+#define BLOCK_COMMENT(str) block_comment(str)
+#endif
+#define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":")
+
+static void pass_arg0(MacroAssembler* masm, Register arg) {
+ if (c_rarg0 != arg) {
+ assert_cond(masm != NULL);
+ masm->mv(c_rarg0, arg);
+ }
+}
+
+static void pass_arg1(MacroAssembler* masm, Register arg) {
+ if (c_rarg1 != arg) {
+ assert_cond(masm != NULL);
+ masm->mv(c_rarg1, arg);
+ }
+}
+
+static void pass_arg2(MacroAssembler* masm, Register arg) {
+ if (c_rarg2 != arg) {
+ assert_cond(masm != NULL);
+ masm->mv(c_rarg2, arg);
+ }
+}
+
+static void pass_arg3(MacroAssembler* masm, Register arg) {
+ if (c_rarg3 != arg) {
+ assert_cond(masm != NULL);
+ masm->mv(c_rarg3, arg);
+ }
+}
+
+void MacroAssembler::align(int modulus, int extra_offset) {
+ CompressibleRegion cr(this);
+ while ((offset() + extra_offset) % modulus != 0) { nop(); }
+}
+
+void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
+ call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
+}
+
+// Implementation of call_VM versions
+
+void MacroAssembler::call_VM(Register oop_result,
+ address entry_point,
+ bool check_exceptions) {
+ call_VM_helper(oop_result, entry_point, 0, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+ address entry_point,
+ Register arg_1,
+ bool check_exceptions) {
+ pass_arg1(this, arg_1);
+ call_VM_helper(oop_result, entry_point, 1, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+ address entry_point,
+ Register arg_1,
+ Register arg_2,
+ bool check_exceptions) {
+ assert(arg_1 != c_rarg2, "smashed arg");
+ pass_arg2(this, arg_2);
+ pass_arg1(this, arg_1);
+ call_VM_helper(oop_result, entry_point, 2, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+ address entry_point,
+ Register arg_1,
+ Register arg_2,
+ Register arg_3,
+ bool check_exceptions) {
+ assert(arg_1 != c_rarg3, "smashed arg");
+ assert(arg_2 != c_rarg3, "smashed arg");
+ pass_arg3(this, arg_3);
+
+ assert(arg_1 != c_rarg2, "smashed arg");
+ pass_arg2(this, arg_2);
+
+ pass_arg1(this, arg_1);
+ call_VM_helper(oop_result, entry_point, 3, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+ Register last_java_sp,
+ address entry_point,
+ int number_of_arguments,
+ bool check_exceptions) {
+ call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+ Register last_java_sp,
+ address entry_point,
+ Register arg_1,
+ bool check_exceptions) {
+ pass_arg1(this, arg_1);
+ call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+ Register last_java_sp,
+ address entry_point,
+ Register arg_1,
+ Register arg_2,
+ bool check_exceptions) {
+
+ assert(arg_1 != c_rarg2, "smashed arg");
+ pass_arg2(this, arg_2);
+ pass_arg1(this, arg_1);
+ call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+ Register last_java_sp,
+ address entry_point,
+ Register arg_1,
+ Register arg_2,
+ Register arg_3,
+ bool check_exceptions) {
+ assert(arg_1 != c_rarg3, "smashed arg");
+ assert(arg_2 != c_rarg3, "smashed arg");
+ pass_arg3(this, arg_3);
+ assert(arg_1 != c_rarg2, "smashed arg");
+ pass_arg2(this, arg_2);
+ pass_arg1(this, arg_1);
+ call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
+}
+
+// these are no-ops overridden by InterpreterMacroAssembler
+void MacroAssembler::check_and_handle_earlyret(Register java_thread) {}
+void MacroAssembler::check_and_handle_popframe(Register java_thread) {}
+
+// Calls to C land
+//
+// When entering C land, the fp, & esp of the last Java frame have to be recorded
+// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
+// has to be reset to 0. This is required to allow proper stack traversal.
+void MacroAssembler::set_last_Java_frame(Register last_java_sp,
+ Register last_java_fp,
+ Register last_java_pc,
+ Register tmp) {
+
+ if (last_java_pc->is_valid()) {
+ sd(last_java_pc, Address(xthread,
+ JavaThread::frame_anchor_offset() +
+ JavaFrameAnchor::last_Java_pc_offset()));
+ }
+
+ // determine last_java_sp register
+ if (last_java_sp == sp) {
+ mv(tmp, sp);
+ last_java_sp = tmp;
+ } else if (!last_java_sp->is_valid()) {
+ last_java_sp = esp;
+ }
+
+ sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset()));
+
+ // last_java_fp is optional
+ if (last_java_fp->is_valid()) {
+ sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset()));
+ }
+}
+
+void MacroAssembler::set_last_Java_frame(Register last_java_sp,
+ Register last_java_fp,
+ address last_java_pc,
+ Register tmp) {
+ assert(last_java_pc != NULL, "must provide a valid PC");
+
+ la(tmp, last_java_pc);
+ sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
+
+ set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp);
+}
+
+void MacroAssembler::set_last_Java_frame(Register last_java_sp,
+ Register last_java_fp,
+ Label &L,
+ Register tmp) {
+ if (L.is_bound()) {
+ set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp);
+ } else {
+ InstructionMark im(this);
+ L.add_patch_at(code(), locator());
+ set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp);
+ }
+}
+
+void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
+ // we must set sp to zero to clear frame
+ sd(zr, Address(xthread, JavaThread::last_Java_sp_offset()));
+
+ // must clear fp, so that compiled frames are not confused; it is
+ // possible that we need it only for debugging
+ if (clear_fp) {
+ sd(zr, Address(xthread, JavaThread::last_Java_fp_offset()));
+ }
+
+ // Always clear the pc because it could have been set by make_walkable()
+ sd(zr, Address(xthread, JavaThread::last_Java_pc_offset()));
+}
+
+void MacroAssembler::call_VM_base(Register oop_result,
+ Register java_thread,
+ Register last_java_sp,
+ address entry_point,
+ int number_of_arguments,
+ bool check_exceptions) {
+ // determine java_thread register
+ if (!java_thread->is_valid()) {
+ java_thread = xthread;
+ }
+ // determine last_java_sp register
+ if (!last_java_sp->is_valid()) {
+ last_java_sp = esp;
+ }
+
+ // debugging support
+ assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
+ assert(java_thread == xthread, "unexpected register");
+
+ assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
+ assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
+
+ // push java thread (becomes first argument of C function)
+ mv(c_rarg0, java_thread);
+
+ // set last Java frame before call
+ assert(last_java_sp != fp, "can't use fp");
+
+ Label l;
+ set_last_Java_frame(last_java_sp, fp, l, t0);
+
+ // do the call, remove parameters
+ MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l);
+
+ // reset last Java frame
+ // Only interpreter should have to clear fp
+ reset_last_Java_frame(true);
+
+ // C++ interp handles this in the interpreter
+ check_and_handle_popframe(java_thread);
+ check_and_handle_earlyret(java_thread);
+
+ if (check_exceptions) {
+ // check for pending exceptions (java_thread is set upon return)
+ ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset())));
+ Label ok;
+ beqz(t0, ok);
+ int32_t offset = 0;
+ la_patchable(t0, RuntimeAddress(StubRoutines::forward_exception_entry()), offset);
+ jalr(x0, t0, offset);
+ bind(ok);
+ }
+
+ // get oop result if there is one and reset the value in the thread
+ if (oop_result->is_valid()) {
+ get_vm_result(oop_result, java_thread);
+ }
+}
+
+void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
+ ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
+ sd(zr, Address(java_thread, JavaThread::vm_result_offset()));
+ verify_oop(oop_result, "broken oop in call_VM_base");
+}
+
+void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
+ ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
+ sd(zr, Address(java_thread, JavaThread::vm_result_2_offset()));
+}
+
+void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) {
+ assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required");
+ assert_different_registers(klass, xthread, tmp);
+
+ Label L_fallthrough, L_tmp;
+ if (L_fast_path == NULL) {
+ L_fast_path = &L_fallthrough;
+ } else if (L_slow_path == NULL) {
+ L_slow_path = &L_fallthrough;
+ }
+
+ // Fast path check: class is fully initialized
+ lbu(tmp, Address(klass, InstanceKlass::init_state_offset()));
+ sub(tmp, tmp, InstanceKlass::fully_initialized);
+ beqz(tmp, *L_fast_path);
+
+ // Fast path check: current thread is initializer thread
+ ld(tmp, Address(klass, InstanceKlass::init_thread_offset()));
+
+ if (L_slow_path == &L_fallthrough) {
+ beq(xthread, tmp, *L_fast_path);
+ bind(*L_slow_path);
+ } else if (L_fast_path == &L_fallthrough) {
+ bne(xthread, tmp, *L_slow_path);
+ bind(*L_fast_path);
+ } else {
+ Unimplemented();
+ }
+}
+
+void MacroAssembler::verify_oop(Register reg, const char* s) {
+ if (!VerifyOops) { return; }
+
+ // Pass register number to verify_oop_subroutine
+ const char* b = NULL;
+ {
+ ResourceMark rm;
+ stringStream ss;
+ ss.print("verify_oop: %s: %s", reg->name(), s);
+ b = code_string(ss.as_string());
+ }
+ BLOCK_COMMENT("verify_oop {");
+
+ push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
+
+ mv(c_rarg0, reg); // c_rarg0 : x10
+ li(t0, (uintptr_t)(address)b);
+
+ // call indirectly to solve generation ordering problem
+ int32_t offset = 0;
+ la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
+ ld(t1, Address(t1, offset));
+ jalr(t1);
+
+ pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
+
+ BLOCK_COMMENT("} verify_oop");
+}
+
+void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
+ if (!VerifyOops) {
+ return;
+ }
+
+ const char* b = NULL;
+ {
+ ResourceMark rm;
+ stringStream ss;
+ ss.print("verify_oop_addr: %s", s);
+ b = code_string(ss.as_string());
+ }
+ BLOCK_COMMENT("verify_oop_addr {");
+
+ push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
+
+ if (addr.uses(sp)) {
+ la(x10, addr);
+ ld(x10, Address(x10, 4 * wordSize));
+ } else {
+ ld(x10, addr);
+ }
+
+ li(t0, (uintptr_t)(address)b);
+
+ // call indirectly to solve generation ordering problem
+ int32_t offset = 0;
+ la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
+ ld(t1, Address(t1, offset));
+ jalr(t1);
+
+ pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
+
+ BLOCK_COMMENT("} verify_oop_addr");
+}
+
+Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
+ int extra_slot_offset) {
+ // cf. TemplateTable::prepare_invoke(), if (load_receiver).
+ int stackElementSize = Interpreter::stackElementSize;
+ int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
+#ifdef ASSERT
+ int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
+ assert(offset1 - offset == stackElementSize, "correct arithmetic");
+#endif
+ if (arg_slot.is_constant()) {
+ return Address(esp, arg_slot.as_constant() * stackElementSize + offset);
+ } else {
+ assert_different_registers(t0, arg_slot.as_register());
+ shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize));
+ return Address(t0, offset);
+ }
+}
+
+#ifndef PRODUCT
+extern "C" void findpc(intptr_t x);
+#endif
+
+void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[])
+{
+ // In order to get locks to work, we need to fake a in_VM state
+ if (ShowMessageBoxOnError) {
+ JavaThread* thread = JavaThread::current();
+ JavaThreadState saved_state = thread->thread_state();
+ thread->set_thread_state(_thread_in_vm);
+#ifndef PRODUCT
+ if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
+ ttyLocker ttyl;
+ BytecodeCounter::print();
+ }
+#endif
+ if (os::message_box(msg, "Execution stopped, print registers?")) {
+ ttyLocker ttyl;
+ tty->print_cr(" pc = 0x%016lx", pc);
+#ifndef PRODUCT
+ tty->cr();
+ findpc(pc);
+ tty->cr();
+#endif
+ tty->print_cr(" x0 = 0x%016lx", regs[0]);
+ tty->print_cr(" x1 = 0x%016lx", regs[1]);
+ tty->print_cr(" x2 = 0x%016lx", regs[2]);
+ tty->print_cr(" x3 = 0x%016lx", regs[3]);
+ tty->print_cr(" x4 = 0x%016lx", regs[4]);
+ tty->print_cr(" x5 = 0x%016lx", regs[5]);
+ tty->print_cr(" x6 = 0x%016lx", regs[6]);
+ tty->print_cr(" x7 = 0x%016lx", regs[7]);
+ tty->print_cr(" x8 = 0x%016lx", regs[8]);
+ tty->print_cr(" x9 = 0x%016lx", regs[9]);
+ tty->print_cr("x10 = 0x%016lx", regs[10]);
+ tty->print_cr("x11 = 0x%016lx", regs[11]);
+ tty->print_cr("x12 = 0x%016lx", regs[12]);
+ tty->print_cr("x13 = 0x%016lx", regs[13]);
+ tty->print_cr("x14 = 0x%016lx", regs[14]);
+ tty->print_cr("x15 = 0x%016lx", regs[15]);
+ tty->print_cr("x16 = 0x%016lx", regs[16]);
+ tty->print_cr("x17 = 0x%016lx", regs[17]);
+ tty->print_cr("x18 = 0x%016lx", regs[18]);
+ tty->print_cr("x19 = 0x%016lx", regs[19]);
+ tty->print_cr("x20 = 0x%016lx", regs[20]);
+ tty->print_cr("x21 = 0x%016lx", regs[21]);
+ tty->print_cr("x22 = 0x%016lx", regs[22]);
+ tty->print_cr("x23 = 0x%016lx", regs[23]);
+ tty->print_cr("x24 = 0x%016lx", regs[24]);
+ tty->print_cr("x25 = 0x%016lx", regs[25]);
+ tty->print_cr("x26 = 0x%016lx", regs[26]);
+ tty->print_cr("x27 = 0x%016lx", regs[27]);
+ tty->print_cr("x28 = 0x%016lx", regs[28]);
+ tty->print_cr("x30 = 0x%016lx", regs[30]);
+ tty->print_cr("x31 = 0x%016lx", regs[31]);
+ BREAKPOINT;
+ }
+ }
+ fatal("DEBUG MESSAGE: %s", msg);
+}
+
+void MacroAssembler::resolve_jobject(Register value, Register thread, Register tmp) {
+ Label done, not_weak;
+ beqz(value, done); // Use NULL as-is.
+
+ // Test for jweak tag.
+ andi(t0, value, JNIHandles::weak_tag_mask);
+ beqz(t0, not_weak);
+
+ // Resolve jweak.
+ access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value,
+ Address(value, -JNIHandles::weak_tag_value), tmp, thread);
+ verify_oop(value);
+ j(done);
+
+ bind(not_weak);
+ // Resolve (untagged) jobject.
+ access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread);
+ verify_oop(value);
+ bind(done);
+}
+
+void MacroAssembler::stop(const char* msg) {
+ address ip = pc();
+ pusha();
+ // The length of the instruction sequence emitted should be independent
+ // of the values of msg and ip so that the size of mach nodes for scratch
+ // emit and normal emit matches.
+ mv(c_rarg0, (address)msg);
+ mv(c_rarg1, (address)ip);
+ mv(c_rarg2, sp);
+ mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64));
+ jalr(c_rarg3);
+ ebreak();
+}
+
+void MacroAssembler::unimplemented(const char* what) {
+ const char* buf = NULL;
+ {
+ ResourceMark rm;
+ stringStream ss;
+ ss.print("unimplemented: %s", what);
+ buf = code_string(ss.as_string());
+ }
+ stop(buf);
+}
+
+void MacroAssembler::emit_static_call_stub() {
+ // CompiledDirectStaticCall::set_to_interpreted knows the
+ // exact layout of this stub.
+
+ ifence();
+ mov_metadata(xmethod, (Metadata*)NULL);
+
+ // Jump to the entry point of the i2c stub.
+ int32_t offset = 0;
+ movptr_with_offset(t0, 0, offset);
+ jalr(x0, t0, offset);
+}
+
+void MacroAssembler::call_VM_leaf_base(address entry_point,
+ int number_of_arguments,
+ Label *retaddr) {
+ call_native_base(entry_point, retaddr);
+}
+
+void MacroAssembler::call_native(address entry_point, Register arg_0) {
+ pass_arg0(this, arg_0);
+ call_native_base(entry_point);
+}
+
+void MacroAssembler::call_native_base(address entry_point, Label *retaddr) {
+ Label E, L;
+ int32_t offset = 0;
+ push_reg(0x80000040, sp); // push << t0 & xmethod >> to sp
+ movptr_with_offset(t0, entry_point, offset);
+ jalr(x1, t0, offset);
+ if (retaddr != NULL) {
+ bind(*retaddr);
+ }
+ pop_reg(0x80000040, sp); // pop << t0 & xmethod >> from sp
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
+ call_VM_leaf_base(entry_point, number_of_arguments);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
+ pass_arg0(this, arg_0);
+ call_VM_leaf_base(entry_point, 1);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
+ pass_arg0(this, arg_0);
+ pass_arg1(this, arg_1);
+ call_VM_leaf_base(entry_point, 2);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0,
+ Register arg_1, Register arg_2) {
+ pass_arg0(this, arg_0);
+ pass_arg1(this, arg_1);
+ pass_arg2(this, arg_2);
+ call_VM_leaf_base(entry_point, 3);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
+ pass_arg0(this, arg_0);
+ MacroAssembler::call_VM_leaf_base(entry_point, 1);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
+
+ assert(arg_0 != c_rarg1, "smashed arg");
+ pass_arg1(this, arg_1);
+ pass_arg0(this, arg_0);
+ MacroAssembler::call_VM_leaf_base(entry_point, 2);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
+ assert(arg_0 != c_rarg2, "smashed arg");
+ assert(arg_1 != c_rarg2, "smashed arg");
+ pass_arg2(this, arg_2);
+ assert(arg_0 != c_rarg1, "smashed arg");
+ pass_arg1(this, arg_1);
+ pass_arg0(this, arg_0);
+ MacroAssembler::call_VM_leaf_base(entry_point, 3);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
+ assert(arg_0 != c_rarg3, "smashed arg");
+ assert(arg_1 != c_rarg3, "smashed arg");
+ assert(arg_2 != c_rarg3, "smashed arg");
+ pass_arg3(this, arg_3);
+ assert(arg_0 != c_rarg2, "smashed arg");
+ assert(arg_1 != c_rarg2, "smashed arg");
+ pass_arg2(this, arg_2);
+ assert(arg_0 != c_rarg1, "smashed arg");
+ pass_arg1(this, arg_1);
+ pass_arg0(this, arg_0);
+ MacroAssembler::call_VM_leaf_base(entry_point, 4);
+}
+
+void MacroAssembler::nop() {
+ addi(x0, x0, 0);
+}
+
+void MacroAssembler::mv(Register Rd, Register Rs) {
+ if (Rd != Rs) {
+ addi(Rd, Rs, 0);
+ }
+}
+
+void MacroAssembler::notr(Register Rd, Register Rs) {
+ xori(Rd, Rs, -1);
+}
+
+void MacroAssembler::neg(Register Rd, Register Rs) {
+ sub(Rd, x0, Rs);
+}
+
+void MacroAssembler::negw(Register Rd, Register Rs) {
+ subw(Rd, x0, Rs);
+}
+
+void MacroAssembler::sext_w(Register Rd, Register Rs) {
+ addiw(Rd, Rs, 0);
+}
+
+void MacroAssembler::zext_b(Register Rd, Register Rs) {
+ andi(Rd, Rs, 0xFF);
+}
+
+void MacroAssembler::seqz(Register Rd, Register Rs) {
+ sltiu(Rd, Rs, 1);
+}
+
+void MacroAssembler::snez(Register Rd, Register Rs) {
+ sltu(Rd, x0, Rs);
+}
+
+void MacroAssembler::sltz(Register Rd, Register Rs) {
+ slt(Rd, Rs, x0);
+}
+
+void MacroAssembler::sgtz(Register Rd, Register Rs) {
+ slt(Rd, x0, Rs);
+}
+
+void MacroAssembler::fmv_s(FloatRegister Rd, FloatRegister Rs) {
+ if (Rd != Rs) {
+ fsgnj_s(Rd, Rs, Rs);
+ }
+}
+
+void MacroAssembler::fabs_s(FloatRegister Rd, FloatRegister Rs) {
+ fsgnjx_s(Rd, Rs, Rs);
+}
+
+void MacroAssembler::fneg_s(FloatRegister Rd, FloatRegister Rs) {
+ fsgnjn_s(Rd, Rs, Rs);
+}
+
+void MacroAssembler::fmv_d(FloatRegister Rd, FloatRegister Rs) {
+ if (Rd != Rs) {
+ fsgnj_d(Rd, Rs, Rs);
+ }
+}
+
+void MacroAssembler::fabs_d(FloatRegister Rd, FloatRegister Rs) {
+ fsgnjx_d(Rd, Rs, Rs);
+}
+
+void MacroAssembler::fneg_d(FloatRegister Rd, FloatRegister Rs) {
+ fsgnjn_d(Rd, Rs, Rs);
+}
+
+void MacroAssembler::vmnot_m(VectorRegister vd, VectorRegister vs) {
+ vmnand_mm(vd, vs, vs);
+}
+
+void MacroAssembler::vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm) {
+ vnsrl_wx(vd, vs, x0, vm);
+}
+
+void MacroAssembler::vfneg_v(VectorRegister vd, VectorRegister vs) {
+ vfsgnjn_vv(vd, vs, vs);
+}
+
+void MacroAssembler::la(Register Rd, const address &dest) {
+ int64_t offset = dest - pc();
+ if (is_offset_in_range(offset, 32)) {
+ auipc(Rd, (int32_t)offset + 0x800); //0x800, Note:the 11th sign bit
+ addi(Rd, Rd, ((int64_t)offset << 52) >> 52);
+ } else {
+ movptr(Rd, dest);
+ }
+}
+
+void MacroAssembler::la(Register Rd, const Address &adr) {
+ InstructionMark im(this);
+ code_section()->relocate(inst_mark(), adr.rspec());
+ relocInfo::relocType rtype = adr.rspec().reloc()->type();
+
+ switch (adr.getMode()) {
+ case Address::literal: {
+ if (rtype == relocInfo::none) {
+ li(Rd, (intptr_t)(adr.target()));
+ } else {
+ movptr(Rd, adr.target());
+ }
+ break;
+ }
+ case Address::base_plus_offset: {
+ int32_t offset = 0;
+ baseOffset(Rd, adr, offset);
+ addi(Rd, Rd, offset);
+ break;
+ }
+ default:
+ ShouldNotReachHere();
+ }
+}
+
+void MacroAssembler::la(Register Rd, Label &label) {
+ la(Rd, target(label));
+}
+
+#define INSN(NAME) \
+ void MacroAssembler::NAME##z(Register Rs, const address &dest) { \
+ NAME(Rs, zr, dest); \
+ } \
+ void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) { \
+ NAME(Rs, zr, l, is_far); \
+ } \
+
+ INSN(beq);
+ INSN(bne);
+ INSN(blt);
+ INSN(ble);
+ INSN(bge);
+ INSN(bgt);
+
+#undef INSN
+
+// Float compare branch instructions
+
+#define INSN(NAME, FLOATCMP, BRANCH) \
+ void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \
+ FLOATCMP##_s(t0, Rs1, Rs2); \
+ BRANCH(t0, l, is_far); \
+ } \
+ void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \
+ FLOATCMP##_d(t0, Rs1, Rs2); \
+ BRANCH(t0, l, is_far); \
+ }
+
+ INSN(beq, feq, bnez);
+ INSN(bne, feq, beqz);
+
+#undef INSN
+
+
+#define INSN(NAME, FLOATCMP1, FLOATCMP2) \
+ void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
+ bool is_far, bool is_unordered) { \
+ if (is_unordered) { \
+ /* jump if either source is NaN or condition is expected */ \
+ FLOATCMP2##_s(t0, Rs2, Rs1); \
+ beqz(t0, l, is_far); \
+ } else { \
+ /* jump if no NaN in source and condition is expected */ \
+ FLOATCMP1##_s(t0, Rs1, Rs2); \
+ bnez(t0, l, is_far); \
+ } \
+ } \
+ void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
+ bool is_far, bool is_unordered) { \
+ if (is_unordered) { \
+ /* jump if either source is NaN or condition is expected */ \
+ FLOATCMP2##_d(t0, Rs2, Rs1); \
+ beqz(t0, l, is_far); \
+ } else { \
+ /* jump if no NaN in source and condition is expected */ \
+ FLOATCMP1##_d(t0, Rs1, Rs2); \
+ bnez(t0, l, is_far); \
+ } \
+ }
+
+ INSN(ble, fle, flt);
+ INSN(blt, flt, fle);
+
+#undef INSN
+
+#define INSN(NAME, CMP) \
+ void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
+ bool is_far, bool is_unordered) { \
+ float_##CMP(Rs2, Rs1, l, is_far, is_unordered); \
+ } \
+ void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
+ bool is_far, bool is_unordered) { \
+ double_##CMP(Rs2, Rs1, l, is_far, is_unordered); \
+ }
+
+ INSN(bgt, blt);
+ INSN(bge, ble);
+
+#undef INSN
+
+
+#define INSN(NAME, CSR) \
+ void MacroAssembler::NAME(Register Rd) { \
+ csrr(Rd, CSR); \
+ }
+
+ INSN(rdinstret, CSR_INSTERT);
+ INSN(rdcycle, CSR_CYCLE);
+ INSN(rdtime, CSR_TIME);
+ INSN(frcsr, CSR_FCSR);
+ INSN(frrm, CSR_FRM);
+ INSN(frflags, CSR_FFLAGS);
+
+#undef INSN
+
+void MacroAssembler::csrr(Register Rd, unsigned csr) {
+ csrrs(Rd, csr, x0);
+}
+
+#define INSN(NAME, OPFUN) \
+ void MacroAssembler::NAME(unsigned csr, Register Rs) { \
+ OPFUN(x0, csr, Rs); \
+ }
+
+ INSN(csrw, csrrw);
+ INSN(csrs, csrrs);
+ INSN(csrc, csrrc);
+
+#undef INSN
+
+#define INSN(NAME, OPFUN) \
+ void MacroAssembler::NAME(unsigned csr, unsigned imm) { \
+ OPFUN(x0, csr, imm); \
+ }
+
+ INSN(csrwi, csrrwi);
+ INSN(csrsi, csrrsi);
+ INSN(csrci, csrrci);
+
+#undef INSN
+
+#define INSN(NAME, CSR) \
+ void MacroAssembler::NAME(Register Rd, Register Rs) { \
+ csrrw(Rd, CSR, Rs); \
+ }
+
+ INSN(fscsr, CSR_FCSR);
+ INSN(fsrm, CSR_FRM);
+ INSN(fsflags, CSR_FFLAGS);
+
+#undef INSN
+
+#define INSN(NAME) \
+ void MacroAssembler::NAME(Register Rs) { \
+ NAME(x0, Rs); \
+ }
+
+ INSN(fscsr);
+ INSN(fsrm);
+ INSN(fsflags);
+
+#undef INSN
+
+void MacroAssembler::fsrmi(Register Rd, unsigned imm) {
+ guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register");
+ csrrwi(Rd, CSR_FRM, imm);
+}
+
+void MacroAssembler::fsflagsi(Register Rd, unsigned imm) {
+ csrrwi(Rd, CSR_FFLAGS, imm);
+}
+
+#define INSN(NAME) \
+ void MacroAssembler::NAME(unsigned imm) { \
+ NAME(x0, imm); \
+ }
+
+ INSN(fsrmi);
+ INSN(fsflagsi);
+
+#undef INSN
+
+void MacroAssembler::push_reg(Register Rs)
+{
+ addi(esp, esp, 0 - wordSize);
+ sd(Rs, Address(esp, 0));
+}
+
+void MacroAssembler::pop_reg(Register Rd)
+{
+ ld(Rd, esp, 0);
+ addi(esp, esp, wordSize);
+}
+
+int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) {
+ int count = 0;
+ // Scan bitset to accumulate register pairs
+ for (int reg = 31; reg >= 0; reg--) {
+ if ((1U << 31) & bitset) {
+ regs[count++] = reg;
+ }
+ bitset <<= 1;
+ }
+ return count;
+}
+
+// Push lots of registers in the bit set supplied. Don't push sp.
+// Return the number of words pushed
+int MacroAssembler::push_reg(unsigned int bitset, Register stack) {
+ DEBUG_ONLY(int words_pushed = 0;)
+ CompressibleRegion cr(this);
+
+ unsigned char regs[32];
+ int count = bitset_to_regs(bitset, regs);
+ // reserve one slot to align for odd count
+ int offset = is_even(count) ? 0 : wordSize;
+
+ if (count) {
+ addi(stack, stack, - count * wordSize - offset);
+ }
+ for (int i = count - 1; i >= 0; i--) {
+ sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
+ DEBUG_ONLY(words_pushed ++;)
+ }
+
+ assert(words_pushed == count, "oops, pushed != count");
+
+ return count;
+}
+
+int MacroAssembler::pop_reg(unsigned int bitset, Register stack) {
+ DEBUG_ONLY(int words_popped = 0;)
+ CompressibleRegion cr(this);
+
+ unsigned char regs[32];
+ int count = bitset_to_regs(bitset, regs);
+ // reserve one slot to align for odd count
+ int offset = is_even(count) ? 0 : wordSize;
+
+ for (int i = count - 1; i >= 0; i--) {
+ ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
+ DEBUG_ONLY(words_popped ++;)
+ }
+
+ if (count) {
+ addi(stack, stack, count * wordSize + offset);
+ }
+ assert(words_popped == count, "oops, popped != count");
+
+ return count;
+}
+
+// Push float registers in the bitset, except sp.
+// Return the number of heapwords pushed.
+int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
+ CompressibleRegion cr(this);
+ int words_pushed = 0;
+ unsigned char regs[32];
+ int count = bitset_to_regs(bitset, regs);
+ int push_slots = count + (count & 1);
+
+ if (count) {
+ addi(stack, stack, -push_slots * wordSize);
+ }
+
+ for (int i = count - 1; i >= 0; i--) {
+ fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize));
+ words_pushed++;
+ }
+
+ assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count);
+ return count;
+}
+
+int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
+ CompressibleRegion cr(this);
+ int words_popped = 0;
+ unsigned char regs[32];
+ int count = bitset_to_regs(bitset, regs);
+ int pop_slots = count + (count & 1);
+
+ for (int i = count - 1; i >= 0; i--) {
+ fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize));
+ words_popped++;
+ }
+
+ if (count) {
+ addi(stack, stack, pop_slots * wordSize);
+ }
+
+ assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count);
+ return count;
+}
+
+#ifdef COMPILER2
+int MacroAssembler::push_vp(unsigned int bitset, Register stack) {
+ CompressibleRegion cr(this);
+ int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
+
+ // Scan bitset to accumulate register pairs
+ unsigned char regs[32];
+ int count = 0;
+ for (int reg = 31; reg >= 0; reg--) {
+ if ((1U << 31) & bitset) {
+ regs[count++] = reg;
+ }
+ bitset <<= 1;
+ }
+
+ for (int i = 0; i < count; i++) {
+ sub(stack, stack, vector_size_in_bytes);
+ vs1r_v(as_VectorRegister(regs[i]), stack);
+ }
+
+ return count * vector_size_in_bytes / wordSize;
+}
+
+int MacroAssembler::pop_vp(unsigned int bitset, Register stack) {
+ CompressibleRegion cr(this);
+ int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
+
+ // Scan bitset to accumulate register pairs
+ unsigned char regs[32];
+ int count = 0;
+ for (int reg = 31; reg >= 0; reg--) {
+ if ((1U << 31) & bitset) {
+ regs[count++] = reg;
+ }
+ bitset <<= 1;
+ }
+
+ for (int i = count - 1; i >= 0; i--) {
+ vl1r_v(as_VectorRegister(regs[i]), stack);
+ add(stack, stack, vector_size_in_bytes);
+ }
+
+ return count * vector_size_in_bytes / wordSize;
+}
+#endif // COMPILER2
+
+void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) {
+ CompressibleRegion cr(this);
+ // Push integer registers x7, x10-x17, x28-x31.
+ push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
+
+ // Push float registers f0-f7, f10-f17, f28-f31.
+ addi(sp, sp, - wordSize * 20);
+ int offset = 0;
+ for (int i = 0; i < 32; i++) {
+ if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
+ fsd(as_FloatRegister(i), Address(sp, wordSize * (offset ++)));
+ }
+ }
+}
+
+void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) {
+ CompressibleRegion cr(this);
+ int offset = 0;
+ for (int i = 0; i < 32; i++) {
+ if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
+ fld(as_FloatRegister(i), Address(sp, wordSize * (offset ++)));
+ }
+ }
+ addi(sp, sp, wordSize * 20);
+
+ pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
+}
+
+// Push all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4).
+void MacroAssembler::pusha() {
+ CompressibleRegion cr(this);
+ push_reg(0xffffffe2, sp);
+}
+
+// Pop all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4).
+void MacroAssembler::popa() {
+ CompressibleRegion cr(this);
+ pop_reg(0xffffffe2, sp);
+}
+
+void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) {
+ CompressibleRegion cr(this);
+ // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
+ push_reg(0xffffffe0, sp);
+
+ // float registers
+ addi(sp, sp, - 32 * wordSize);
+ for (int i = 0; i < 32; i++) {
+ fsd(as_FloatRegister(i), Address(sp, i * wordSize));
+ }
+
+ // vector registers
+ if (save_vectors) {
+ sub(sp, sp, vector_size_in_bytes * VectorRegisterImpl::number_of_registers);
+ vsetvli(t0, x0, Assembler::e64, Assembler::m8);
+ for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) {
+ add(t0, sp, vector_size_in_bytes * i);
+ vse64_v(as_VectorRegister(i), t0);
+ }
+ }
+}
+
+void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) {
+ CompressibleRegion cr(this);
+ // vector registers
+ if (restore_vectors) {
+ vsetvli(t0, x0, Assembler::e64, Assembler::m8);
+ for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) {
+ vle64_v(as_VectorRegister(i), sp);
+ add(sp, sp, vector_size_in_bytes * 8);
+ }
+ }
+
+ // float registers
+ for (int i = 0; i < 32; i++) {
+ fld(as_FloatRegister(i), Address(sp, i * wordSize));
+ }
+ addi(sp, sp, 32 * wordSize);
+
+ // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
+ pop_reg(0xffffffe0, sp);
+}
+
+static int patch_offset_in_jal(address branch, int64_t offset) {
+ assert(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction!\n");
+ Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1); // offset[20] ==> branch[31]
+ Assembler::patch(branch, 30, 21, (offset >> 1) & 0x3ff); // offset[10:1] ==> branch[30:21]
+ Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1); // offset[11] ==> branch[20]
+ Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff); // offset[19:12] ==> branch[19:12]
+ return NativeInstruction::instruction_size; // only one instruction
+}
+
+static int patch_offset_in_conditional_branch(address branch, int64_t offset) {
+ assert(is_imm_in_range(offset, 12, 1), "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne insrusction!\n");
+ Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1); // offset[12] ==> branch[31]
+ Assembler::patch(branch, 30, 25, (offset >> 5) & 0x3f); // offset[10:5] ==> branch[30:25]
+ Assembler::patch(branch, 7, 7, (offset >> 11) & 0x1); // offset[11] ==> branch[7]
+ Assembler::patch(branch, 11, 8, (offset >> 1) & 0xf); // offset[4:1] ==> branch[11:8]
+ return NativeInstruction::instruction_size; // only one instruction
+}
+
+static int patch_offset_in_pc_relative(address branch, int64_t offset) {
+ const int PC_RELATIVE_INSTRUCTION_NUM = 2; // auipc, addi/jalr/load
+ Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff); // Auipc. offset[31:12] ==> branch[31:12]
+ Assembler::patch(branch + 4, 31, 20, offset & 0xfff); // Addi/Jalr/Load. offset[11:0] ==> branch[31:20]
+ return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size;
+}
+
+static int patch_addr_in_movptr(address branch, address target) {
+ const int MOVPTR_INSTRUCTIONS_NUM = 6; // lui + addi + slli + addi + slli + addi/jalr/load
+ int32_t lower = ((intptr_t)target << 36) >> 36;
+ int64_t upper = ((intptr_t)target - lower) >> 28;
+ Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[47:28] + target[27] ==> branch[31:12]
+ Assembler::patch(branch + 4, 31, 20, (lower >> 16) & 0xfff); // Addi. target[27:16] ==> branch[31:20]
+ Assembler::patch(branch + 12, 31, 20, (lower >> 5) & 0x7ff); // Addi. target[15: 5] ==> branch[31:20]
+ Assembler::patch(branch + 20, 31, 20, lower & 0x1f); // Addi/Jalr/Load. target[ 4: 0] ==> branch[31:20]
+ return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
+}
+
+static int patch_imm_in_li64(address branch, address target) {
+ const int LI64_INSTRUCTIONS_NUM = 8; // lui + addi + slli + addi + slli + addi + slli + addi
+ int64_t lower = (intptr_t)target & 0xffffffff;
+ lower = lower - ((lower << 44) >> 44);
+ int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower;
+ int32_t upper = (tmp_imm - (int32_t)lower) >> 32;
+ int64_t tmp_upper = upper, tmp_lower = upper;
+ tmp_lower = (tmp_lower << 52) >> 52;
+ tmp_upper -= tmp_lower;
+ tmp_upper >>= 12;
+ // Load upper 32 bits. Upper = target[63:32], but if target[31] = 1 or (target[31:28] == 0x7ff && target[19] == 1),
+ // upper = target[63:32] + 1.
+ Assembler::patch(branch + 0, 31, 12, tmp_upper & 0xfffff); // Lui.
+ Assembler::patch(branch + 4, 31, 20, tmp_lower & 0xfff); // Addi.
+ // Load the rest 32 bits.
+ Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff); // Addi.
+ Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff); // Addi.
+ Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff); // Addi.
+ return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
+}
+
+static int patch_imm_in_li32(address branch, int32_t target) {
+ const int LI32_INSTRUCTIONS_NUM = 2; // lui + addiw
+ int64_t upper = (intptr_t)target;
+ int32_t lower = (((int32_t)target) << 20) >> 20;
+ upper -= lower;
+ upper = (int32_t)upper;
+ Assembler::patch(branch + 0, 31, 12, (upper >> 12) & 0xfffff); // Lui.
+ Assembler::patch(branch + 4, 31, 20, lower & 0xfff); // Addiw.
+ return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
+}
+
+static long get_offset_of_jal(address insn_addr) {
+ assert_cond(insn_addr != NULL);
+ long offset = 0;
+ unsigned insn = *(unsigned*)insn_addr;
+ long val = (long)Assembler::sextract(insn, 31, 12);
+ offset |= ((val >> 19) & 0x1) << 20;
+ offset |= (val & 0xff) << 12;
+ offset |= ((val >> 8) & 0x1) << 11;
+ offset |= ((val >> 9) & 0x3ff) << 1;
+ offset = (offset << 43) >> 43;
+ return offset;
+}
+
+static long get_offset_of_conditional_branch(address insn_addr) {
+ long offset = 0;
+ assert_cond(insn_addr != NULL);
+ unsigned insn = *(unsigned*)insn_addr;
+ offset = (long)Assembler::sextract(insn, 31, 31);
+ offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11);
+ offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5);
+ offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1);
+ offset = (offset << 41) >> 41;
+ return offset;
+}
+
+static long get_offset_of_pc_relative(address insn_addr) {
+ long offset = 0;
+ assert_cond(insn_addr != NULL);
+ offset = ((long)(Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12))) << 12; // Auipc.
+ offset += ((long)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)); // Addi/Jalr/Load.
+ offset = (offset << 32) >> 32;
+ return offset;
+}
+
+static address get_target_of_movptr(address insn_addr) {
+ assert_cond(insn_addr != NULL);
+ intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 28; // Lui.
+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 16; // Addi.
+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 5; // Addi.
+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)); // Addi/Jalr/Load.
+ return (address) target_address;
+}
+
+static address get_target_of_li64(address insn_addr) {
+ assert_cond(insn_addr != NULL);
+ intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 44; // Lui.
+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 32; // Addi.
+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 20; // Addi.
+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)) << 8; // Addi.
+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[7], 31, 20)); // Addi.
+ return (address)target_address;
+}
+
+static address get_target_of_li32(address insn_addr) {
+ assert_cond(insn_addr != NULL);
+ intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 12; // Lui.
+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)); // Addiw.
+ return (address)target_address;
+}
+
+// Patch any kind of instruction; there may be several instructions.
+// Return the total length (in bytes) of the instructions.
+int MacroAssembler::pd_patch_instruction_size(address branch, address target) {
+ assert_cond(branch != NULL);
+ int64_t offset = target - branch;
+ if (NativeInstruction::is_jal_at(branch)) { // jal
+ return patch_offset_in_jal(branch, offset);
+ } else if (NativeInstruction::is_branch_at(branch)) { // beq/bge/bgeu/blt/bltu/bne
+ return patch_offset_in_conditional_branch(branch, offset);
+ } else if (NativeInstruction::is_pc_relative_at(branch)) { // auipc, addi/jalr/load
+ return patch_offset_in_pc_relative(branch, offset);
+ } else if (NativeInstruction::is_movptr_at(branch)) { // movptr
+ return patch_addr_in_movptr(branch, target);
+ } else if (NativeInstruction::is_li64_at(branch)) { // li64
+ return patch_imm_in_li64(branch, target);
+ } else if (NativeInstruction::is_li32_at(branch)) { // li32
+ int64_t imm = (intptr_t)target;
+ return patch_imm_in_li32(branch, (int32_t)imm);
+ } else {
+#ifdef ASSERT
+ tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n",
+ *(unsigned*)branch, p2i(branch));
+ Disassembler::decode(branch - 16, branch + 16);
+#endif
+ ShouldNotReachHere();
+ return -1;
+ }
+}
+
+address MacroAssembler::target_addr_for_insn(address insn_addr) {
+ long offset = 0;
+ assert_cond(insn_addr != NULL);
+ if (NativeInstruction::is_jal_at(insn_addr)) { // jal
+ offset = get_offset_of_jal(insn_addr);
+ } else if (NativeInstruction::is_branch_at(insn_addr)) { // beq/bge/bgeu/blt/bltu/bne
+ offset = get_offset_of_conditional_branch(insn_addr);
+ } else if (NativeInstruction::is_pc_relative_at(insn_addr)) { // auipc, addi/jalr/load
+ offset = get_offset_of_pc_relative(insn_addr);
+ } else if (NativeInstruction::is_movptr_at(insn_addr)) { // movptr
+ return get_target_of_movptr(insn_addr);
+ } else if (NativeInstruction::is_li64_at(insn_addr)) { // li64
+ return get_target_of_li64(insn_addr);
+ } else if (NativeInstruction::is_li32_at(insn_addr)) { // li32
+ return get_target_of_li32(insn_addr);
+ } else {
+ ShouldNotReachHere();
+ }
+ return address(((uintptr_t)insn_addr + offset));
+}
+
+int MacroAssembler::patch_oop(address insn_addr, address o) {
+ // OOPs are either narrow (32 bits) or wide (48 bits). We encode
+ // narrow OOPs by setting the upper 16 bits in the first
+ // instruction.
+ if (NativeInstruction::is_li32_at(insn_addr)) {
+ // Move narrow OOP
+ uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o));
+ return patch_imm_in_li32(insn_addr, (int32_t)n);
+ } else if (NativeInstruction::is_movptr_at(insn_addr)) {
+ // Move wide OOP
+ return patch_addr_in_movptr(insn_addr, o);
+ }
+ ShouldNotReachHere();
+ return -1;
+}
+
+void MacroAssembler::reinit_heapbase() {
+ if (UseCompressedOops) {
+ if (Universe::is_fully_initialized()) {
+ mv(xheapbase, CompressedOops::ptrs_base());
+ } else {
+ int32_t offset = 0;
+ la_patchable(xheapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()), offset);
+ ld(xheapbase, Address(xheapbase, offset));
+ }
+ }
+}
+
+void MacroAssembler::mv(Register Rd, Address dest) {
+ assert(dest.getMode() == Address::literal, "Address mode should be Address::literal");
+ code_section()->relocate(pc(), dest.rspec());
+ movptr(Rd, dest.target());
+}
+
+void MacroAssembler::mv(Register Rd, address addr) {
+ // Here in case of use with relocation, use fix length instruciton
+ // movptr instead of li
+ movptr(Rd, addr);
+}
+
+void MacroAssembler::mv(Register Rd, RegisterOrConstant src) {
+ if (src.is_register()) {
+ mv(Rd, src.as_register());
+ } else {
+ mv(Rd, src.as_constant());
+ }
+}
+
+void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) {
+ andr(Rd, Rs1, Rs2);
+ // addw: The result is clipped to 32 bits, then the sign bit is extended,
+ // and the result is stored in Rd
+ addw(Rd, Rd, zr);
+}
+
+void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) {
+ orr(Rd, Rs1, Rs2);
+ // addw: The result is clipped to 32 bits, then the sign bit is extended,
+ // and the result is stored in Rd
+ addw(Rd, Rd, zr);
+}
+
+void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) {
+ xorr(Rd, Rs1, Rs2);
+ // addw: The result is clipped to 32 bits, then the sign bit is extended,
+ // and the result is stored in Rd
+ addw(Rd, Rd, zr);
+}
+
+// Note: load_unsigned_short used to be called load_unsigned_word.
+int MacroAssembler::load_unsigned_short(Register dst, Address src) {
+ int off = offset();
+ lhu(dst, src);
+ return off;
+}
+
+int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
+ int off = offset();
+ lbu(dst, src);
+ return off;
+}
+
+int MacroAssembler::load_signed_short(Register dst, Address src) {
+ int off = offset();
+ lh(dst, src);
+ return off;
+}
+
+int MacroAssembler::load_signed_byte(Register dst, Address src) {
+ int off = offset();
+ lb(dst, src);
+ return off;
+}
+
+void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
+ switch (size_in_bytes) {
+ case 8: ld(dst, src); break;
+ case 4: is_signed ? lw(dst, src) : lwu(dst, src); break;
+ case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
+ case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
+ default: ShouldNotReachHere();
+ }
+}
+
+void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
+ switch (size_in_bytes) {
+ case 8: sd(src, dst); break;
+ case 4: sw(src, dst); break;
+ case 2: sh(src, dst); break;
+ case 1: sb(src, dst); break;
+ default: ShouldNotReachHere();
+ }
+}
+
+// reverse bytes in halfword in lower 16 bits and sign-extend
+// Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits)
+void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) {
+ if (UseZbb) {
+ rev8(Rd, Rs);
+ srai(Rd, Rd, 48);
+ return;
+ }
+ assert_different_registers(Rs, tmp);
+ assert_different_registers(Rd, tmp);
+ srli(tmp, Rs, 8);
+ andi(tmp, tmp, 0xFF);
+ slli(Rd, Rs, 56);
+ srai(Rd, Rd, 48); // sign-extend
+ orr(Rd, Rd, tmp);
+}
+
+// reverse bytes in lower word and sign-extend
+// Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits)
+void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
+ if (UseZbb) {
+ rev8(Rd, Rs);
+ srai(Rd, Rd, 32);
+ return;
+ }
+ assert_different_registers(Rs, tmp1, tmp2);
+ assert_different_registers(Rd, tmp1, tmp2);
+ revb_h_w_u(Rd, Rs, tmp1, tmp2);
+ slli(tmp2, Rd, 48);
+ srai(tmp2, tmp2, 32); // sign-extend
+ srli(Rd, Rd, 16);
+ orr(Rd, Rd, tmp2);
+}
+
+// reverse bytes in halfword in lower 16 bits and zero-extend
+// Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
+void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) {
+ if (UseZbb) {
+ rev8(Rd, Rs);
+ srli(Rd, Rd, 48);
+ return;
+ }
+ assert_different_registers(Rs, tmp);
+ assert_different_registers(Rd, tmp);
+ srli(tmp, Rs, 8);
+ andi(tmp, tmp, 0xFF);
+ andi(Rd, Rs, 0xFF);
+ slli(Rd, Rd, 8);
+ orr(Rd, Rd, tmp);
+}
+
+// reverse bytes in halfwords in lower 32 bits and zero-extend
+// Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
+void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) {
+ if (UseZbb) {
+ rev8(Rd, Rs);
+ rori(Rd, Rd, 32);
+ roriw(Rd, Rd, 16);
+ zero_extend(Rd, Rd, 32);
+ return;
+ }
+ assert_different_registers(Rs, tmp1, tmp2);
+ assert_different_registers(Rd, tmp1, tmp2);
+ srli(tmp2, Rs, 16);
+ revb_h_h_u(tmp2, tmp2, tmp1);
+ revb_h_h_u(Rd, Rs, tmp1);
+ slli(tmp2, tmp2, 16);
+ orr(Rd, Rd, tmp2);
+}
+
+// This method is only used for revb_h
+// Rd = Rs[47:0] Rs[55:48] Rs[63:56]
+void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) {
+ assert_different_registers(Rs, tmp1, tmp2);
+ assert_different_registers(Rd, tmp1);
+ srli(tmp1, Rs, 48);
+ andi(tmp2, tmp1, 0xFF);
+ slli(tmp2, tmp2, 8);
+ srli(tmp1, tmp1, 8);
+ orr(tmp1, tmp1, tmp2);
+ slli(Rd, Rs, 16);
+ orr(Rd, Rd, tmp1);
+}
+
+// reverse bytes in each halfword
+// Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8]
+void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) {
+ if (UseZbb) {
+ assert_different_registers(Rs, tmp1);
+ assert_different_registers(Rd, tmp1);
+ rev8(Rd, Rs);
+ zero_extend(tmp1, Rd, 32);
+ roriw(tmp1, tmp1, 16);
+ slli(tmp1, tmp1, 32);
+ srli(Rd, Rd, 32);
+ roriw(Rd, Rd, 16);
+ zero_extend(Rd, Rd, 32);
+ orr(Rd, Rd, tmp1);
+ return;
+ }
+ assert_different_registers(Rs, tmp1, tmp2);
+ assert_different_registers(Rd, tmp1, tmp2);
+ revb_h_helper(Rd, Rs, tmp1, tmp2);
+ for (int i = 0; i < 3; ++i) {
+ revb_h_helper(Rd, Rd, tmp1, tmp2);
+ }
+}
+
+// reverse bytes in each word
+// Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24]
+void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
+ if (UseZbb) {
+ rev8(Rd, Rs);
+ rori(Rd, Rd, 32);
+ return;
+ }
+ assert_different_registers(Rs, tmp1, tmp2);
+ assert_different_registers(Rd, tmp1, tmp2);
+ revb(Rd, Rs, tmp1, tmp2);
+ ror_imm(Rd, Rd, 32);
+}
+
+// reverse bytes in doubleword
+// Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56]
+void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) {
+ if (UseZbb) {
+ rev8(Rd, Rs);
+ return;
+ }
+ assert_different_registers(Rs, tmp1, tmp2);
+ assert_different_registers(Rd, tmp1, tmp2);
+ andi(tmp1, Rs, 0xFF);
+ slli(tmp1, tmp1, 8);
+ for (int step = 8; step < 56; step += 8) {
+ srli(tmp2, Rs, step);
+ andi(tmp2, tmp2, 0xFF);
+ orr(tmp1, tmp1, tmp2);
+ slli(tmp1, tmp1, 8);
+ }
+ srli(Rd, Rs, 56);
+ andi(Rd, Rd, 0xFF);
+ orr(Rd, tmp1, Rd);
+}
+
+// rotate right with shift bits
+void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp)
+{
+ if (UseZbb) {
+ rori(dst, src, shift);
+ return;
+ }
+
+ assert_different_registers(dst, tmp);
+ assert_different_registers(src, tmp);
+ assert(shift < 64, "shift amount must be < 64");
+ slli(tmp, src, 64 - shift);
+ srli(dst, src, shift);
+ orr(dst, dst, tmp);
+}
+
+void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) {
+ if (is_imm_in_range(imm, 12, 0)) {
+ and_imm12(Rd, Rn, imm);
+ } else {
+ assert_different_registers(Rn, tmp);
+ li(tmp, imm);
+ andr(Rd, Rn, tmp);
+ }
+}
+
+void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) {
+ ld(tmp1, adr);
+ if (src.is_register()) {
+ orr(tmp1, tmp1, src.as_register());
+ } else {
+ if (is_imm_in_range(src.as_constant(), 12, 0)) {
+ ori(tmp1, tmp1, src.as_constant());
+ } else {
+ assert_different_registers(tmp1, tmp2);
+ li(tmp2, src.as_constant());
+ orr(tmp1, tmp1, tmp2);
+ }
+ }
+ sd(tmp1, adr);
+}
+
+void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) {
+ if (UseCompressedClassPointers) {
+ lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
+ if (CompressedKlassPointers::base() == NULL) {
+ slli(tmp, tmp, CompressedKlassPointers::shift());
+ beq(trial_klass, tmp, L);
+ return;
+ }
+ decode_klass_not_null(tmp);
+ } else {
+ ld(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
+ }
+ beq(trial_klass, tmp, L);
+}
+
+// Move an oop into a register. immediate is true if we want
+// immediate instructions and nmethod entry barriers are not enabled.
+// i.e. we are not going to patch this instruction while the code is being
+// executed by another thread.
+void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
+ int oop_index;
+ if (obj == NULL) {
+ oop_index = oop_recorder()->allocate_oop_index(obj);
+ } else {
+#ifdef ASSERT
+ {
+ ThreadInVMfromUnknown tiv;
+ assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
+ }
+#endif
+ oop_index = oop_recorder()->find_index(obj);
+ }
+ RelocationHolder rspec = oop_Relocation::spec(oop_index);
+
+ // nmethod entry barrier necessitate using the constant pool. They have to be
+ // ordered with respected to oop access.
+ // Using immediate literals would necessitate fence.i.
+ if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate) {
+ address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address
+ ld_constant(dst, Address(dummy, rspec));
+ } else
+ mv(dst, Address((address)obj, rspec));
+}
+
+// Move a metadata address into a register.
+void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
+ int oop_index;
+ if (obj == NULL) {
+ oop_index = oop_recorder()->allocate_metadata_index(obj);
+ } else {
+ oop_index = oop_recorder()->find_index(obj);
+ }
+ RelocationHolder rspec = metadata_Relocation::spec(oop_index);
+ mv(dst, Address((address)obj, rspec));
+}
+
+// Writes to stack successive pages until offset reached to check for
+// stack overflow + shadow pages. This clobbers tmp.
+void MacroAssembler::bang_stack_size(Register size, Register tmp) {
+ assert_different_registers(tmp, size, t0);
+ // Bang stack for total size given plus shadow page size.
+ // Bang one page at a time because large size can bang beyond yellow and
+ // red zones.
+ mv(t0, os::vm_page_size());
+ Label loop;
+ bind(loop);
+ sub(tmp, sp, t0);
+ subw(size, size, t0);
+ sd(size, Address(tmp));
+ bgtz(size, loop);
+
+ // Bang down shadow pages too.
+ // At this point, (tmp-0) is the last address touched, so don't
+ // touch it again. (It was touched as (tmp-pagesize) but then tmp
+ // was post-decremented.) Skip this address by starting at i=1, and
+ // touch a few more pages below. N.B. It is important to touch all
+ // the way down to and including i=StackShadowPages.
+ for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) {
+ // this could be any sized move but this is can be a debugging crumb
+ // so the bigger the better.
+ sub(tmp, tmp, os::vm_page_size());
+ sd(size, Address(tmp, 0));
+ }
+}
+
+SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) {
+ assert_cond(masm != NULL);
+ int32_t offset = 0;
+ _masm = masm;
+ _masm->la_patchable(t0, ExternalAddress((address)flag_addr), offset);
+ _masm->lbu(t0, Address(t0, offset));
+ _masm->beqz(t0, _label);
+}
+
+SkipIfEqual::~SkipIfEqual() {
+ assert_cond(_masm != NULL);
+ _masm->bind(_label);
+ _masm = NULL;
+}
+
+void MacroAssembler::load_mirror(Register dst, Register method, Register tmp) {
+ const int mirror_offset = in_bytes(Klass::java_mirror_offset());
+ ld(dst, Address(xmethod, Method::const_offset()));
+ ld(dst, Address(dst, ConstMethod::constants_offset()));
+ ld(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes()));
+ ld(dst, Address(dst, mirror_offset));
+ resolve_oop_handle(dst, tmp);
+}
+
+void MacroAssembler::resolve_oop_handle(Register result, Register tmp) {
+ // OopHandle::resolve is an indirection.
+ assert_different_registers(result, tmp);
+ access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg);
+}
+
+// ((WeakHandle)result).resolve()
+void MacroAssembler::resolve_weak_handle(Register result, Register tmp) {
+ assert_different_registers(result, tmp);
+ Label resolved;
+
+ // A null weak handle resolves to null.
+ beqz(result, resolved);
+
+ // Only 64 bit platforms support GCs that require a tmp register
+ // Only IN_HEAP loads require a thread_tmp register
+ // WeakHandle::resolve is an indirection like jweak.
+ access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
+ result, Address(result), tmp, noreg /* tmp_thread */);
+ bind(resolved);
+}
+
+void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
+ Register dst, Address src,
+ Register tmp1, Register thread_tmp) {
+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+ decorators = AccessInternal::decorator_fixup(decorators);
+ bool as_raw = (decorators & AS_RAW) != 0;
+ if (as_raw) {
+ bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
+ } else {
+ bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
+ }
+}
+
+void MacroAssembler::null_check(Register reg, int offset) {
+ if (needs_explicit_null_check(offset)) {
+ // provoke OS NULL exception if reg = NULL by
+ // accessing M[reg] w/o changing any registers
+ // NOTE: this is plenty to provoke a segv
+ ld(zr, Address(reg, 0));
+ } else {
+ // nothing to do, (later) access of M[reg + offset]
+ // will provoke OS NULL exception if reg = NULL
+ }
+}
+
+void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
+ Address dst, Register src,
+ Register tmp1, Register thread_tmp) {
+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+ decorators = AccessInternal::decorator_fixup(decorators);
+ bool as_raw = (decorators & AS_RAW) != 0;
+ if (as_raw) {
+ bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, thread_tmp);
+ } else {
+ bs->store_at(this, decorators, type, dst, src, tmp1, thread_tmp);
+ }
+}
+
+// Algorithm must match CompressedOops::encode.
+void MacroAssembler::encode_heap_oop(Register d, Register s) {
+ verify_oop(s, "broken oop in encode_heap_oop");
+ if (CompressedOops::base() == NULL) {
+ if (CompressedOops::shift() != 0) {
+ assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
+ srli(d, s, LogMinObjAlignmentInBytes);
+ } else {
+ mv(d, s);
+ }
+ } else {
+ Label notNull;
+ sub(d, s, xheapbase);
+ bgez(d, notNull);
+ mv(d, zr);
+ bind(notNull);
+ if (CompressedOops::shift() != 0) {
+ assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
+ srli(d, d, CompressedOops::shift());
+ }
+ }
+}
+
+void MacroAssembler::load_klass(Register dst, Register src) {
+ if (UseCompressedClassPointers) {
+ lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+ decode_klass_not_null(dst);
+ } else {
+ ld(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+ }
+}
+
+void MacroAssembler::store_klass(Register dst, Register src) {
+ // FIXME: Should this be a store release? concurrent gcs assumes
+ // klass length is valid if klass field is not null.
+ if (UseCompressedClassPointers) {
+ encode_klass_not_null(src);
+ sw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
+ } else {
+ sd(src, Address(dst, oopDesc::klass_offset_in_bytes()));
+ }
+}
+
+void MacroAssembler::store_klass_gap(Register dst, Register src) {
+ if (UseCompressedClassPointers) {
+ // Store to klass gap in destination
+ sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
+ }
+}
+
+void MacroAssembler::decode_klass_not_null(Register r) {
+ decode_klass_not_null(r, r);
+}
+
+void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) {
+ assert(UseCompressedClassPointers, "should only be used for compressed headers");
+
+ if (CompressedKlassPointers::base() == NULL) {
+ if (CompressedKlassPointers::shift() != 0) {
+ assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
+ slli(dst, src, LogKlassAlignmentInBytes);
+ } else {
+ mv(dst, src);
+ }
+ return;
+ }
+
+ Register xbase = dst;
+ if (dst == src) {
+ xbase = tmp;
+ }
+
+ assert_different_registers(src, xbase);
+ li(xbase, (uintptr_t)CompressedKlassPointers::base());
+
+ if (CompressedKlassPointers::shift() != 0) {
+ assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
+ assert_different_registers(t0, xbase);
+ shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes);
+ } else {
+ add(dst, xbase, src);
+ }
+
+ if (xbase == xheapbase) { reinit_heapbase(); }
+}
+
+void MacroAssembler::encode_klass_not_null(Register r) {
+ encode_klass_not_null(r, r);
+}
+
+void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) {
+ assert(UseCompressedClassPointers, "should only be used for compressed headers");
+
+ if (CompressedKlassPointers::base() == NULL) {
+ if (CompressedKlassPointers::shift() != 0) {
+ assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
+ srli(dst, src, LogKlassAlignmentInBytes);
+ } else {
+ mv(dst, src);
+ }
+ return;
+ }
+
+ if (((uint64_t)(uintptr_t)CompressedKlassPointers::base() & 0xffffffff) == 0 &&
+ CompressedKlassPointers::shift() == 0) {
+ zero_extend(dst, src, 32);
+ return;
+ }
+
+ Register xbase = dst;
+ if (dst == src) {
+ xbase = tmp;
+ }
+
+ assert_different_registers(src, xbase);
+ li(xbase, (intptr_t)CompressedKlassPointers::base());
+ sub(dst, src, xbase);
+ if (CompressedKlassPointers::shift() != 0) {
+ assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
+ srli(dst, dst, LogKlassAlignmentInBytes);
+ }
+ if (xbase == xheapbase) {
+ reinit_heapbase();
+ }
+}
+
+void MacroAssembler::decode_heap_oop_not_null(Register r) {
+ decode_heap_oop_not_null(r, r);
+}
+
+void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
+ assert(UseCompressedOops, "should only be used for compressed headers");
+ assert(Universe::heap() != NULL, "java heap should be initialized");
+ // Cannot assert, unverified entry point counts instructions (see .ad file)
+ // vtableStubs also counts instructions in pd_code_size_limit.
+ // Also do not verify_oop as this is called by verify_oop.
+ if (CompressedOops::shift() != 0) {
+ assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
+ slli(dst, src, LogMinObjAlignmentInBytes);
+ if (CompressedOops::base() != NULL) {
+ add(dst, xheapbase, dst);
+ }
+ } else {
+ assert(CompressedOops::base() == NULL, "sanity");
+ mv(dst, src);
+ }
+}
+
+void MacroAssembler::decode_heap_oop(Register d, Register s) {
+ if (CompressedOops::base() == NULL) {
+ if (CompressedOops::shift() != 0 || d != s) {
+ slli(d, s, CompressedOops::shift());
+ }
+ } else {
+ Label done;
+ mv(d, s);
+ beqz(s, done);
+ shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes);
+ bind(done);
+ }
+ verify_oop(d, "broken oop in decode_heap_oop");
+}
+
+void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
+ Register thread_tmp, DecoratorSet decorators) {
+ access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
+}
+
+void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
+ Register thread_tmp, DecoratorSet decorators) {
+ access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
+}
+
+void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
+ Register thread_tmp, DecoratorSet decorators) {
+ access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, thread_tmp);
+}
+
+// Used for storing NULLs.
+void MacroAssembler::store_heap_oop_null(Address dst) {
+ access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg);
+}
+
+int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2,
+ bool want_remainder)
+{
+ // Full implementation of Java idiv and irem. The function
+ // returns the (pc) offset of the div instruction - may be needed
+ // for implicit exceptions.
+ //
+ // input : rs1: dividend
+ // rs2: divisor
+ //
+ // result: either
+ // quotient (= rs1 idiv rs2)
+ // remainder (= rs1 irem rs2)
+
+
+ int idivl_offset = offset();
+ if (!want_remainder) {
+ divw(result, rs1, rs2);
+ } else {
+ remw(result, rs1, rs2); // result = rs1 % rs2;
+ }
+ return idivl_offset;
+}
+
+int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2,
+ bool want_remainder)
+{
+ // Full implementation of Java ldiv and lrem. The function
+ // returns the (pc) offset of the div instruction - may be needed
+ // for implicit exceptions.
+ //
+ // input : rs1: dividend
+ // rs2: divisor
+ //
+ // result: either
+ // quotient (= rs1 idiv rs2)
+ // remainder (= rs1 irem rs2)
+
+ int idivq_offset = offset();
+ if (!want_remainder) {
+ div(result, rs1, rs2);
+ } else {
+ rem(result, rs1, rs2); // result = rs1 % rs2;
+ }
+ return idivq_offset;
+}
+
+// Look up the method for a megamorpic invkkeinterface call.
+// The target method is determined by .
+// The receiver klass is in recv_klass.
+// On success, the result will be in method_result, and execution falls through.
+// On failure, execution transfers to the given label.
+void MacroAssembler::lookup_interface_method(Register recv_klass,
+ Register intf_klass,
+ RegisterOrConstant itable_index,
+ Register method_result,
+ Register scan_tmp,
+ Label& L_no_such_interface,
+ bool return_method) {
+ assert_different_registers(recv_klass, intf_klass, scan_tmp);
+ assert_different_registers(method_result, intf_klass, scan_tmp);
+ assert(recv_klass != method_result || !return_method,
+ "recv_klass can be destroyed when mehtid isn't needed");
+ assert(itable_index.is_constant() || itable_index.as_register() == method_result,
+ "caller must be same register for non-constant itable index as for method");
+
+ // Compute start of first itableOffsetEntry (which is at the end of the vtable).
+ int vtable_base = in_bytes(Klass::vtable_start_offset());
+ int itentry_off = itableMethodEntry::method_offset_in_bytes();
+ int scan_step = itableOffsetEntry::size() * wordSize;
+ int vte_size = vtableEntry::size_in_bytes();
+ assert(vte_size == wordSize, "else adjust times_vte_scale");
+
+ lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset()));
+
+ // %%% Could store the aligned, prescaled offset in the klassoop.
+ shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3);
+ add(scan_tmp, scan_tmp, vtable_base);
+
+ if (return_method) {
+ // Adjust recv_klass by scaled itable_index, so we can free itable_index.
+ assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
+ if (itable_index.is_register()) {
+ slli(t0, itable_index.as_register(), 3);
+ } else {
+ li(t0, itable_index.as_constant() << 3);
+ }
+ add(recv_klass, recv_klass, t0);
+ if (itentry_off) {
+ add(recv_klass, recv_klass, itentry_off);
+ }
+ }
+
+ Label search, found_method;
+
+ ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
+ beq(intf_klass, method_result, found_method);
+ bind(search);
+ // Check that the previous entry is non-null. A null entry means that
+ // the receiver class doens't implement the interface, and wasn't the
+ // same as when the caller was compiled.
+ beqz(method_result, L_no_such_interface, /* is_far */ true);
+ addi(scan_tmp, scan_tmp, scan_step);
+ ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
+ bne(intf_klass, method_result, search);
+
+ bind(found_method);
+
+ // Got a hit.
+ if (return_method) {
+ lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset_in_bytes()));
+ add(method_result, recv_klass, scan_tmp);
+ ld(method_result, Address(method_result));
+ }
+}
+
+// virtual method calling
+void MacroAssembler::lookup_virtual_method(Register recv_klass,
+ RegisterOrConstant vtable_index,
+ Register method_result) {
+ const int base = in_bytes(Klass::vtable_start_offset());
+ assert(vtableEntry::size() * wordSize == 8,
+ "adjust the scaling in the code below");
+ int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes();
+
+ if (vtable_index.is_register()) {
+ shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord);
+ ld(method_result, Address(method_result, vtable_offset_in_bytes));
+ } else {
+ vtable_offset_in_bytes += vtable_index.as_constant() * wordSize;
+ ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes));
+ }
+}
+
+void MacroAssembler::membar(uint32_t order_constraint) {
+ address prev = pc() - NativeMembar::instruction_size;
+ address last = code()->last_insn();
+
+ if (last != NULL && nativeInstruction_at(last)->is_membar() && prev == last) {
+ NativeMembar *bar = NativeMembar_at(prev);
+ // We are merging two memory barrier instructions. On RISCV we
+ // can do this simply by ORing them together.
+ bar->set_kind(bar->get_kind() | order_constraint);
+ BLOCK_COMMENT("merged membar");
+ } else {
+ code()->set_last_insn(pc());
+
+ uint32_t predecessor = 0;
+ uint32_t successor = 0;
+
+ membar_mask_to_pred_succ(order_constraint, predecessor, successor);
+ fence(predecessor, successor);
+ }
+}
+
+// Form an addres from base + offset in Rd. Rd my or may not
+// actually be used: you must use the Address that is returned. It
+// is up to you to ensure that the shift provided mathces the size
+// of your data.
+Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset) {
+ if (is_offset_in_range(byte_offset, 12)) { // 12: imm in range 2^12
+ return Address(base, byte_offset);
+ }
+
+ // Do it the hard way
+ mv(Rd, byte_offset);
+ add(Rd, base, Rd);
+ return Address(Rd);
+}
+
+void MacroAssembler::check_klass_subtype(Register sub_klass,
+ Register super_klass,
+ Register tmp_reg,
+ Label& L_success) {
+ Label L_failure;
+ check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, NULL);
+ check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, NULL);
+ bind(L_failure);
+}
+
+void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) {
+ ld(t0, Address(xthread, JavaThread::polling_word_offset()));
+ if (acquire) {
+ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+ }
+ if (at_return) {
+ bgtu(in_nmethod ? sp : fp, t0, slow_path, true /* is_far */);
+ } else {
+ andi(t0, t0, SafepointMechanism::poll_bit());
+ bnez(t0, slow_path, true /* is_far */);
+ }
+}
+
+void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
+ Label &succeed, Label *fail) {
+ // oldv holds comparison value
+ // newv holds value to write in exchange
+ // addr identifies memory word to compare against/update
+ Label retry_load, nope;
+ bind(retry_load);
+ // Load reserved from the memory location
+ lr_d(tmp, addr, Assembler::aqrl);
+ // Fail and exit if it is not what we expect
+ bne(tmp, oldv, nope);
+ // If the store conditional succeeds, tmp will be zero
+ sc_d(tmp, newv, addr, Assembler::rl);
+ beqz(tmp, succeed);
+ // Retry only when the store conditional failed
+ j(retry_load);
+
+ bind(nope);
+ membar(AnyAny);
+ mv(oldv, tmp);
+ if (fail != NULL) {
+ j(*fail);
+ }
+}
+
+void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp,
+ Label &succeed, Label *fail) {
+ assert(oopDesc::mark_offset_in_bytes() == 0, "assumption");
+ cmpxchgptr(oldv, newv, obj, tmp, succeed, fail);
+}
+
+void MacroAssembler::load_reserved(Register addr,
+ enum operand_size size,
+ Assembler::Aqrl acquire) {
+ switch (size) {
+ case int64:
+ lr_d(t0, addr, acquire);
+ break;
+ case int32:
+ lr_w(t0, addr, acquire);
+ break;
+ case uint32:
+ lr_w(t0, addr, acquire);
+ zero_extend(t0, t0, 32);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+}
+
+void MacroAssembler::store_conditional(Register addr,
+ Register new_val,
+ enum operand_size size,
+ Assembler::Aqrl release) {
+ switch (size) {
+ case int64:
+ sc_d(t0, new_val, addr, release);
+ break;
+ case int32:
+ case uint32:
+ sc_w(t0, new_val, addr, release);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+}
+
+
+void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected,
+ Register new_val,
+ enum operand_size size,
+ Register tmp1, Register tmp2, Register tmp3) {
+ assert(size == int8 || size == int16, "unsupported operand size");
+
+ Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3;
+
+ andi(shift, addr, 3);
+ slli(shift, shift, 3);
+
+ andi(aligned_addr, addr, ~3);
+
+ if (size == int8) {
+ addi(mask, zr, 0xff);
+ } else {
+ // size == int16 case
+ addi(mask, zr, -1);
+ zero_extend(mask, mask, 16);
+ }
+ sll(mask, mask, shift);
+
+ xori(not_mask, mask, -1);
+
+ sll(expected, expected, shift);
+ andr(expected, expected, mask);
+
+ sll(new_val, new_val, shift);
+ andr(new_val, new_val, mask);
+}
+
+// cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps.
+// It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w,
+// which are forced to work with 4-byte aligned address.
+void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected,
+ Register new_val,
+ enum operand_size size,
+ Assembler::Aqrl acquire, Assembler::Aqrl release,
+ Register result, bool result_as_bool,
+ Register tmp1, Register tmp2, Register tmp3) {
+ Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
+ assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
+ cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
+
+ Label retry, fail, done;
+
+ bind(retry);
+ lr_w(old, aligned_addr, acquire);
+ andr(tmp, old, mask);
+ bne(tmp, expected, fail);
+
+ andr(tmp, old, not_mask);
+ orr(tmp, tmp, new_val);
+ sc_w(tmp, tmp, aligned_addr, release);
+ bnez(tmp, retry);
+
+ if (result_as_bool) {
+ addi(result, zr, 1);
+ j(done);
+
+ bind(fail);
+ mv(result, zr);
+
+ bind(done);
+ } else {
+ andr(tmp, old, mask);
+
+ bind(fail);
+ srl(result, tmp, shift);
+
+ if (size == int8) {
+ sign_extend(result, result, 8);
+ } else {
+ // size == int16 case
+ sign_extend(result, result, 16);
+ }
+ }
+}
+
+// weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement
+// the weak CAS stuff. The major difference is that it just failed when store conditional
+// failed.
+void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected,
+ Register new_val,
+ enum operand_size size,
+ Assembler::Aqrl acquire, Assembler::Aqrl release,
+ Register result,
+ Register tmp1, Register tmp2, Register tmp3) {
+ Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
+ assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
+ cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
+
+ Label succ, fail, done;
+
+ lr_w(old, aligned_addr, acquire);
+ andr(tmp, old, mask);
+ bne(tmp, expected, fail);
+
+ andr(tmp, old, not_mask);
+ orr(tmp, tmp, new_val);
+ sc_w(tmp, tmp, aligned_addr, release);
+ beqz(tmp, succ);
+
+ bind(fail);
+ addi(result, zr, 1);
+ j(done);
+
+ bind(succ);
+ mv(result, zr);
+
+ bind(done);
+}
+
+void MacroAssembler::cmpxchg(Register addr, Register expected,
+ Register new_val,
+ enum operand_size size,
+ Assembler::Aqrl acquire, Assembler::Aqrl release,
+ Register result, bool result_as_bool) {
+ assert(size != int8 && size != int16, "unsupported operand size");
+
+ Label retry_load, done, ne_done;
+ bind(retry_load);
+ load_reserved(addr, size, acquire);
+ bne(t0, expected, ne_done);
+ store_conditional(addr, new_val, size, release);
+ bnez(t0, retry_load);
+
+ // equal, succeed
+ if (result_as_bool) {
+ li(result, 1);
+ } else {
+ mv(result, expected);
+ }
+ j(done);
+
+ // not equal, failed
+ bind(ne_done);
+ if (result_as_bool) {
+ mv(result, zr);
+ } else {
+ mv(result, t0);
+ }
+
+ bind(done);
+}
+
+void MacroAssembler::cmpxchg_weak(Register addr, Register expected,
+ Register new_val,
+ enum operand_size size,
+ Assembler::Aqrl acquire, Assembler::Aqrl release,
+ Register result) {
+ Label fail, done, sc_done;
+ load_reserved(addr, size, acquire);
+ bne(t0, expected, fail);
+ store_conditional(addr, new_val, size, release);
+ beqz(t0, sc_done);
+
+ // fail
+ bind(fail);
+ li(result, 1);
+ j(done);
+
+ // sc_done
+ bind(sc_done);
+ mv(result, 0);
+ bind(done);
+}
+
+#define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE) \
+void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \
+ prev = prev->is_valid() ? prev : zr; \
+ if (incr.is_register()) { \
+ AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE)); \
+ } else { \
+ mv(t0, incr.as_constant()); \
+ AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \
+ } \
+ return; \
+}
+
+ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed)
+ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed)
+ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl)
+ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl)
+
+#undef ATOMIC_OP
+
+#define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE) \
+void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \
+ prev = prev->is_valid() ? prev : zr; \
+ AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \
+ return; \
+}
+
+ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed)
+ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed)
+ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl)
+ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl)
+
+#undef ATOMIC_XCHG
+
+#define ATOMIC_XCHGU(OP1, OP2) \
+void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \
+ atomic_##OP2(prev, newv, addr); \
+ zero_extend(prev, prev, 32); \
+ return; \
+}
+
+ATOMIC_XCHGU(xchgwu, xchgw)
+ATOMIC_XCHGU(xchgalwu, xchgalw)
+
+#undef ATOMIC_XCHGU
+
+void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
+ assert(ReservedCodeCacheSize < 4*G, "branch out of range");
+ assert(CodeCache::find_blob(entry.target()) != NULL,
+ "destination of far call not found in code cache");
+ int32_t offset = 0;
+ if (far_branches()) {
+ // We can use auipc + jalr here because we know that the total size of
+ // the code cache cannot exceed 2Gb.
+ la_patchable(tmp, entry, offset);
+ if (cbuf != NULL) { cbuf->set_insts_mark(); }
+ jalr(x0, tmp, offset);
+ } else {
+ if (cbuf != NULL) { cbuf->set_insts_mark(); }
+ j(entry);
+ }
+}
+
+void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
+ assert(ReservedCodeCacheSize < 4*G, "branch out of range");
+ assert(CodeCache::find_blob(entry.target()) != NULL,
+ "destination of far call not found in code cache");
+ int32_t offset = 0;
+ if (far_branches()) {
+ // We can use auipc + jalr here because we know that the total size of
+ // the code cache cannot exceed 2Gb.
+ la_patchable(tmp, entry, offset);
+ if (cbuf != NULL) { cbuf->set_insts_mark(); }
+ jalr(x1, tmp, offset); // link
+ } else {
+ if (cbuf != NULL) { cbuf->set_insts_mark(); }
+ jal(entry); // link
+ }
+}
+
+void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
+ Register super_klass,
+ Register tmp_reg,
+ Label* L_success,
+ Label* L_failure,
+ Label* L_slow_path,
+ Register super_check_offset) {
+ assert_different_registers(sub_klass, super_klass, tmp_reg);
+ bool must_load_sco = (super_check_offset == noreg);
+ if (must_load_sco) {
+ assert(tmp_reg != noreg, "supply either a temp or a register offset");
+ } else {
+ assert_different_registers(sub_klass, super_klass, super_check_offset);
+ }
+
+ Label L_fallthrough;
+ int label_nulls = 0;
+ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
+ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
+ if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
+ assert(label_nulls <= 1, "at most one NULL in batch");
+
+ int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+ int sco_offset = in_bytes(Klass::super_check_offset_offset());
+ Address super_check_offset_addr(super_klass, sco_offset);
+
+ // Hacked jmp, which may only be used just before L_fallthrough.
+#define final_jmp(label) \
+ if (&(label) == &L_fallthrough) { /*do nothing*/ } \
+ else j(label) /*omit semi*/
+
+ // If the pointers are equal, we are done (e.g., String[] elements).
+ // This self-check enables sharing of secondary supertype arrays among
+ // non-primary types such as array-of-interface. Otherwise, each such
+ // type would need its own customized SSA.
+ // We move this check to the front fo the fast path because many
+ // type checks are in fact trivially successful in this manner,
+ // so we get a nicely predicted branch right at the start of the check.
+ beq(sub_klass, super_klass, *L_success);
+
+ // Check the supertype display:
+ if (must_load_sco) {
+ lwu(tmp_reg, super_check_offset_addr);
+ super_check_offset = tmp_reg;
+ }
+ add(t0, sub_klass, super_check_offset);
+ Address super_check_addr(t0);
+ ld(t0, super_check_addr); // load displayed supertype
+
+ // Ths check has worked decisively for primary supers.
+ // Secondary supers are sought in the super_cache ('super_cache_addr').
+ // (Secondary supers are interfaces and very deeply nested subtypes.)
+ // This works in the same check above because of a tricky aliasing
+ // between the super_Cache and the primary super dispaly elements.
+ // (The 'super_check_addr' can address either, as the case requires.)
+ // Note that the cache is updated below if it does not help us find
+ // what we need immediately.
+ // So if it was a primary super, we can just fail immediately.
+ // Otherwise, it's the slow path for us (no success at this point).
+
+ beq(super_klass, t0, *L_success);
+ mv(t1, sc_offset);
+ if (L_failure == &L_fallthrough) {
+ beq(super_check_offset, t1, *L_slow_path);
+ } else {
+ bne(super_check_offset, t1, *L_failure, /* is_far */ true);
+ final_jmp(*L_slow_path);
+ }
+
+ bind(L_fallthrough);
+
+#undef final_jmp
+}
+
+// Scans count pointer sized words at [addr] for occurence of value,
+// generic
+void MacroAssembler::repne_scan(Register addr, Register value, Register count,
+ Register tmp) {
+ Label Lloop, Lexit;
+ beqz(count, Lexit);
+ bind(Lloop);
+ ld(tmp, addr);
+ beq(value, tmp, Lexit);
+ add(addr, addr, wordSize);
+ sub(count, count, 1);
+ bnez(count, Lloop);
+ bind(Lexit);
+}
+
+void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
+ Register super_klass,
+ Register tmp1_reg,
+ Register tmp2_reg,
+ Label* L_success,
+ Label* L_failure) {
+ assert_different_registers(sub_klass, super_klass, tmp1_reg);
+ if (tmp2_reg != noreg) {
+ assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0);
+ }
+#define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg)
+
+ Label L_fallthrough;
+ int label_nulls = 0;
+ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
+ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
+
+ assert(label_nulls <= 1, "at most one NULL in the batch");
+
+ // A couple of usefule fields in sub_klass:
+ int ss_offset = in_bytes(Klass::secondary_supers_offset());
+ int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+ Address secondary_supers_addr(sub_klass, ss_offset);
+ Address super_cache_addr( sub_klass, sc_offset);
+
+ BLOCK_COMMENT("check_klass_subtype_slow_path");
+
+ // Do a linear scan of the secondary super-klass chain.
+ // This code is rarely used, so simplicity is a virtue here.
+ // The repne_scan instruction uses fixed registers, which we must spill.
+ // Don't worry too much about pre-existing connecitons with the input regs.
+
+ assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super)
+ assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter)
+
+ RegSet pushed_registers;
+ if (!IS_A_TEMP(x12)) {
+ pushed_registers += x12;
+ }
+ if (!IS_A_TEMP(x15)) {
+ pushed_registers += x15;
+ }
+
+ if (super_klass != x10 || UseCompressedOops) {
+ if (!IS_A_TEMP(x10)) {
+ pushed_registers += x10;
+ }
+ }
+
+ push_reg(pushed_registers, sp);
+
+ // Get super_klass value into x10 (even if it was in x15 or x12)
+ mv(x10, super_klass);
+
+#ifndef PRODUCT
+ mv(t1, (address)&SharedRuntime::_partial_subtype_ctr);
+ Address pst_counter_addr(t1);
+ ld(t0, pst_counter_addr);
+ add(t0, t0, 1);
+ sd(t0, pst_counter_addr);
+#endif // PRODUCT
+
+ // We will consult the secondary-super array.
+ ld(x15, secondary_supers_addr);
+ // Load the array length.
+ lwu(x12, Address(x15, Array::length_offset_in_bytes()));
+ // Skip to start of data.
+ add(x15, x15, Array::base_offset_in_bytes());
+
+ // Set t0 to an obvious invalid value, falling through by default
+ li(t0, -1);
+ // Scan X12 words at [X15] for an occurrence of X10.
+ repne_scan(x15, x10, x12, t0);
+
+ // pop will restore x10, so we should use a temp register to keep its value
+ mv(t1, x10);
+
+ // Unspill the temp registers:
+ pop_reg(pushed_registers, sp);
+
+ bne(t1, t0, *L_failure);
+
+ // Success. Cache the super we found an proceed in triumph.
+ sd(super_klass, super_cache_addr);
+
+ if (L_success != &L_fallthrough) {
+ j(*L_success);
+ }
+
+#undef IS_A_TEMP
+
+ bind(L_fallthrough);
+}
+
+// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
+void MacroAssembler::tlab_allocate(Register obj,
+ Register var_size_in_bytes,
+ int con_size_in_bytes,
+ Register tmp1,
+ Register tmp2,
+ Label& slow_case,
+ bool is_far) {
+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+ bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far);
+}
+
+// Defines obj, preserves var_size_in_bytes
+void MacroAssembler::eden_allocate(Register obj,
+ Register var_size_in_bytes,
+ int con_size_in_bytes,
+ Register tmp,
+ Label& slow_case,
+ bool is_far) {
+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+ bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp, slow_case, is_far);
+}
+
+
+// get_thread() can be called anywhere inside generated code so we
+// need to save whatever non-callee save context might get clobbered
+// by the call to Thread::current() or, indeed, the call setup code.
+void MacroAssembler::get_thread(Register thread) {
+ // save all call-clobbered regs except thread
+ RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) +
+ RegSet::range(x28, x31) + ra - thread;
+ push_reg(saved_regs, sp);
+
+ int32_t offset = 0;
+ movptr_with_offset(ra, CAST_FROM_FN_PTR(address, Thread::current), offset);
+ jalr(ra, ra, offset);
+ if (thread != x10) {
+ mv(thread, x10);
+ }
+
+ // restore pushed registers
+ pop_reg(saved_regs, sp);
+}
+
+void MacroAssembler::load_byte_map_base(Register reg) {
+ CardTable::CardValue* byte_map_base =
+ ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
+ li(reg, (uint64_t)byte_map_base);
+}
+
+void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) {
+ relocInfo::relocType rtype = dest.rspec().reloc()->type();
+ unsigned long low_address = (uintptr_t)CodeCache::low_bound();
+ unsigned long high_address = (uintptr_t)CodeCache::high_bound();
+ unsigned long dest_address = (uintptr_t)dest.target();
+ long offset_low = dest_address - low_address;
+ long offset_high = dest_address - high_address;
+
+ assert(is_valid_riscv64_address(dest.target()), "bad address");
+ assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address");
+
+ InstructionMark im(this);
+ code_section()->relocate(inst_mark(), dest.rspec());
+ // RISC-V doesn't compute a page-aligned address, in order to partially
+ // compensate for the use of *signed* offsets in its base+disp12
+ // addressing mode (RISC-V's PC-relative reach remains asymmetric
+ // [-(2G + 2K), 2G - 2k).
+ if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) {
+ int64_t distance = dest.target() - pc();
+ auipc(reg1, (int32_t)distance + 0x800);
+ offset = ((int32_t)distance << 20) >> 20;
+ } else {
+ movptr_with_offset(reg1, dest.target(), offset);
+ }
+}
+
+void MacroAssembler::build_frame(int framesize) {
+ assert(framesize >= 2, "framesize must include space for FP/RA");
+ assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
+ sub(sp, sp, framesize);
+ sd(fp, Address(sp, framesize - 2 * wordSize));
+ sd(ra, Address(sp, framesize - wordSize));
+ if (PreserveFramePointer) { add(fp, sp, framesize); }
+ verify_cross_modify_fence_not_required();
+}
+
+void MacroAssembler::remove_frame(int framesize) {
+ assert(framesize >= 2, "framesize must include space for FP/RA");
+ assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
+ ld(fp, Address(sp, framesize - 2 * wordSize));
+ ld(ra, Address(sp, framesize - wordSize));
+ add(sp, sp, framesize);
+}
+
+void MacroAssembler::reserved_stack_check() {
+ // testing if reserved zone needs to be enabled
+ Label no_reserved_zone_enabling;
+
+ ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset()));
+ bltu(sp, t0, no_reserved_zone_enabling);
+
+ enter(); // RA and FP are live.
+ mv(c_rarg0, xthread);
+ int32_t offset = 0;
+ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)), offset);
+ jalr(x1, t0, offset);
+ leave();
+
+ // We have already removed our own frame.
+ // throw_delayed_StackOverflowError will think that it's been
+ // called by our caller.
+ offset = 0;
+ la_patchable(t0, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()), offset);
+ jalr(x0, t0, offset);
+ should_not_reach_here();
+
+ bind(no_reserved_zone_enabling);
+}
+
+// Move the address of the polling page into dest.
+void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) {
+ ld(dest, Address(xthread, JavaThread::polling_page_offset()));
+}
+
+// Read the polling page. The address of the polling page must
+// already be in r.
+address MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) {
+ address mark;
+ {
+ InstructionMark im(this);
+ code_section()->relocate(inst_mark(), rtype);
+ lwu(zr, Address(r, offset));
+ mark = inst_mark();
+ }
+ verify_cross_modify_fence_not_required();
+ return mark;
+}
+
+void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
+#ifdef ASSERT
+ {
+ ThreadInVMfromUnknown tiv;
+ assert (UseCompressedOops, "should only be used for compressed oops");
+ assert (Universe::heap() != NULL, "java heap should be initialized");
+ assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+ assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
+ }
+#endif
+ int oop_index = oop_recorder()->find_index(obj);
+ InstructionMark im(this);
+ RelocationHolder rspec = oop_Relocation::spec(oop_index);
+ code_section()->relocate(inst_mark(), rspec);
+ li32(dst, 0xDEADBEEF);
+ zero_extend(dst, dst, 32);
+}
+
+void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
+ assert (UseCompressedClassPointers, "should only be used for compressed headers");
+ assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+ int index = oop_recorder()->find_index(k);
+ assert(!Universe::heap()->is_in(k), "should not be an oop");
+
+ InstructionMark im(this);
+ RelocationHolder rspec = metadata_Relocation::spec(index);
+ code_section()->relocate(inst_mark(), rspec);
+ narrowKlass nk = CompressedKlassPointers::encode(k);
+ li32(dst, nk);
+ zero_extend(dst, dst, 32);
+}
+
+// Maybe emit a call via a trampoline. If the code cache is small
+// trampolines won't be emitted.
+address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) {
+ assert(JavaThread::current()->is_Compiler_thread(), "just checking");
+ assert(entry.rspec().type() == relocInfo::runtime_call_type ||
+ entry.rspec().type() == relocInfo::opt_virtual_call_type ||
+ entry.rspec().type() == relocInfo::static_call_type ||
+ entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
+
+ // We need a trampoline if branches are far.
+ if (far_branches()) {
+ bool in_scratch_emit_size = false;
+#ifdef COMPILER2
+ // We don't want to emit a trampoline if C2 is generating dummy
+ // code during its branch shortening phase.
+ CompileTask* task = ciEnv::current()->task();
+ in_scratch_emit_size =
+ (task != NULL && is_c2_compile(task->comp_level()) &&
+ Compile::current()->output()->in_scratch_emit_size());
+#endif
+ if (!in_scratch_emit_size) {
+ address stub = emit_trampoline_stub(offset(), entry.target());
+ if (stub == NULL) {
+ postcond(pc() == badAddress);
+ return NULL; // CodeCache is full
+ }
+ }
+ }
+
+ if (cbuf != NULL) { cbuf->set_insts_mark(); }
+ relocate(entry.rspec());
+ if (!far_branches()) {
+ jal(entry.target());
+ } else {
+ jal(pc());
+ }
+ // just need to return a non-null address
+ postcond(pc() != badAddress);
+ return pc();
+}
+
+address MacroAssembler::ic_call(address entry, jint method_index) {
+ RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
+ movptr(t1, (address)Universe::non_oop_word());
+ assert_cond(entry != NULL);
+ return trampoline_call(Address(entry, rh));
+}
+
+// Emit a trampoline stub for a call to a target which is too far away.
+//
+// code sequences:
+//
+// call-site:
+// branch-and-link to or
+//
+// Related trampoline stub for this call site in the stub section:
+// load the call target from the constant pool
+// branch (RA still points to the call site above)
+
+address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
+ address dest) {
+ address stub = start_a_stub(NativeInstruction::instruction_size
+ + NativeCallTrampolineStub::instruction_size);
+ if (stub == NULL) {
+ return NULL; // CodeBuffer::expand failed
+ }
+
+ // Create a trampoline stub relocation which relates this trampoline stub
+ // with the call instruction at insts_call_instruction_offset in the
+ // instructions code-section.
+
+ // make sure 4 byte aligned here, so that the destination address would be
+ // 8 byte aligned after 3 intructions
+ // when we reach here we may get a 2-byte alignment so need to align it
+ align(wordSize, NativeCallTrampolineStub::data_offset);
+
+ relocate(trampoline_stub_Relocation::spec(code()->insts()->start() +
+ insts_call_instruction_offset));
+ const int stub_start_offset = offset();
+
+ // Now, create the trampoline stub's code:
+ // - load the call
+ // - call
+ Label target;
+ ld(t0, target); // auipc + ld
+ jr(t0); // jalr
+ bind(target);
+ assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
+ "should be");
+ assert(offset() % wordSize == 0, "bad alignment");
+ emit_int64((intptr_t)dest);
+
+ const address stub_start_addr = addr_at(stub_start_offset);
+
+ assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
+
+ end_a_stub();
+ return stub_start_addr;
+}
+
+Address MacroAssembler::add_memory_helper(const Address dst) {
+ switch (dst.getMode()) {
+ case Address::base_plus_offset:
+ // This is the expected mode, although we allow all the other
+ // forms below.
+ return form_address(t1, dst.base(), dst.offset());
+ default:
+ la(t1, dst);
+ return Address(t1);
+ }
+}
+
+void MacroAssembler::add_memory_int64(const Address dst, int64_t imm) {
+ Address adr = add_memory_helper(dst);
+ assert_different_registers(adr.base(), t0);
+ ld(t0, adr);
+ addi(t0, t0, imm);
+ sd(t0, adr);
+}
+
+void MacroAssembler::add_memory_int32(const Address dst, int32_t imm) {
+ Address adr = add_memory_helper(dst);
+ assert_different_registers(adr.base(), t0);
+ lwu(t0, adr);
+ addiw(t0, t0, imm);
+ sw(t0, adr);
+}
+
+void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) {
+ assert_different_registers(src1, t0);
+ int32_t offset;
+ la_patchable(t0, src2, offset);
+ ld(t0, Address(t0, offset));
+ beq(src1, t0, equal);
+}
+
+void MacroAssembler::load_method_holder_cld(Register result, Register method) {
+ load_method_holder(result, method);
+ ld(result, Address(result, InstanceKlass::class_loader_data_offset()));
+}
+
+void MacroAssembler::load_method_holder(Register holder, Register method) {
+ ld(holder, Address(method, Method::const_offset())); // ConstMethod*
+ ld(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
+ ld(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
+}
+
+// string indexof
+// compute index by trailing zeros
+void MacroAssembler::compute_index(Register haystack, Register trailing_zeros,
+ Register match_mask, Register result,
+ Register ch2, Register tmp,
+ bool haystack_isL)
+{
+ int haystack_chr_shift = haystack_isL ? 0 : 1;
+ srl(match_mask, match_mask, trailing_zeros);
+ srli(match_mask, match_mask, 1);
+ srli(tmp, trailing_zeros, LogBitsPerByte);
+ if (!haystack_isL) andi(tmp, tmp, 0xE);
+ add(haystack, haystack, tmp);
+ ld(ch2, Address(haystack));
+ if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift);
+ add(result, result, tmp);
+}
+
+// string indexof
+// Find pattern element in src, compute match mask,
+// only the first occurrence of 0x80/0x8000 at low bits is the valid match index
+// match mask patterns and corresponding indices would be like:
+// - 0x8080808080808080 (Latin1)
+// - 7 6 5 4 3 2 1 0 (match index)
+// - 0x8000800080008000 (UTF16)
+// - 3 2 1 0 (match index)
+void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask,
+ Register mask1, Register mask2)
+{
+ xorr(src, pattern, src);
+ sub(match_mask, src, mask1);
+ orr(src, src, mask2);
+ notr(src, src);
+ andr(match_mask, match_mask, src);
+}
+
+#ifdef COMPILER2
+// Code for BigInteger::mulAdd instrinsic
+// out = x10
+// in = x11
+// offset = x12 (already out.length-offset)
+// len = x13
+// k = x14
+// tmp = x28
+//
+// pseudo code from java implementation:
+// long kLong = k & LONG_MASK;
+// carry = 0;
+// offset = out.length-offset - 1;
+// for (int j = len - 1; j >= 0; j--) {
+// product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry;
+// out[offset--] = (int)product;
+// carry = product >>> 32;
+// }
+// return (int)carry;
+void MacroAssembler::mul_add(Register out, Register in, Register offset,
+ Register len, Register k, Register tmp) {
+ Label L_tail_loop, L_unroll, L_end;
+ mv(tmp, out);
+ mv(out, zr);
+ blez(len, L_end);
+ zero_extend(k, k, 32);
+ slliw(t0, offset, LogBytesPerInt);
+ add(offset, tmp, t0);
+ slliw(t0, len, LogBytesPerInt);
+ add(in, in, t0);
+
+ const int unroll = 8;
+ li(tmp, unroll);
+ blt(len, tmp, L_tail_loop);
+ bind(L_unroll);
+ for (int i = 0; i < unroll; i++) {
+ sub(in, in, BytesPerInt);
+ lwu(t0, Address(in, 0));
+ mul(t1, t0, k);
+ add(t0, t1, out);
+ sub(offset, offset, BytesPerInt);
+ lwu(t1, Address(offset, 0));
+ add(t0, t0, t1);
+ sw(t0, Address(offset, 0));
+ srli(out, t0, 32);
+ }
+ subw(len, len, tmp);
+ bge(len, tmp, L_unroll);
+
+ bind(L_tail_loop);
+ blez(len, L_end);
+ sub(in, in, BytesPerInt);
+ lwu(t0, Address(in, 0));
+ mul(t1, t0, k);
+ add(t0, t1, out);
+ sub(offset, offset, BytesPerInt);
+ lwu(t1, Address(offset, 0));
+ add(t0, t0, t1);
+ sw(t0, Address(offset, 0));
+ srli(out, t0, 32);
+ subw(len, len, 1);
+ j(L_tail_loop);
+
+ bind(L_end);
+}
+
+// add two unsigned input and output carry
+void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry)
+{
+ assert_different_registers(dst, carry);
+ assert_different_registers(dst, src2);
+ add(dst, src1, src2);
+ sltu(carry, dst, src2);
+}
+
+// add two input with carry
+void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry)
+{
+ assert_different_registers(dst, carry);
+ add(dst, src1, src2);
+ add(dst, dst, carry);
+}
+
+// add two unsigned input with carry and output carry
+void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry)
+{
+ assert_different_registers(dst, src2);
+ adc(dst, src1, src2, carry);
+ sltu(carry, dst, src2);
+}
+
+void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
+ Register src1, Register src2, Register carry)
+{
+ cad(dest_lo, dest_lo, src1, carry);
+ add(dest_hi, dest_hi, carry);
+ cad(dest_lo, dest_lo, src2, carry);
+ add(final_dest_hi, dest_hi, carry);
+}
+
+/**
+ * Multiply 32 bit by 32 bit first loop.
+ */
+void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
+ Register y, Register y_idx, Register z,
+ Register carry, Register product,
+ Register idx, Register kdx)
+{
+ // jlong carry, x[], y[], z[];
+ // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
+ // long product = y[idx] * x[xstart] + carry;
+ // z[kdx] = (int)product;
+ // carry = product >>> 32;
+ // }
+ // z[xstart] = (int)carry;
+
+ Label L_first_loop, L_first_loop_exit;
+ blez(idx, L_first_loop_exit);
+
+ shadd(t0, xstart, x, t0, LogBytesPerInt);
+ lwu(x_xstart, Address(t0, 0));
+
+ bind(L_first_loop);
+ subw(idx, idx, 1);
+ shadd(t0, idx, y, t0, LogBytesPerInt);
+ lwu(y_idx, Address(t0, 0));
+ mul(product, x_xstart, y_idx);
+ add(product, product, carry);
+ srli(carry, product, 32);
+ subw(kdx, kdx, 1);
+ shadd(t0, kdx, z, t0, LogBytesPerInt);
+ sw(product, Address(t0, 0));
+ bgtz(idx, L_first_loop);
+
+ bind(L_first_loop_exit);
+}
+
+/**
+ * Multiply 64 bit by 64 bit first loop.
+ */
+void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
+ Register y, Register y_idx, Register z,
+ Register carry, Register product,
+ Register idx, Register kdx)
+{
+ //
+ // jlong carry, x[], y[], z[];
+ // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
+ // huge_128 product = y[idx] * x[xstart] + carry;
+ // z[kdx] = (jlong)product;
+ // carry = (jlong)(product >>> 64);
+ // }
+ // z[xstart] = carry;
+ //
+
+ Label L_first_loop, L_first_loop_exit;
+ Label L_one_x, L_one_y, L_multiply;
+
+ subw(xstart, xstart, 1);
+ bltz(xstart, L_one_x);
+
+ shadd(t0, xstart, x, t0, LogBytesPerInt);
+ ld(x_xstart, Address(t0, 0));
+ ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian
+
+ bind(L_first_loop);
+ subw(idx, idx, 1);
+ bltz(idx, L_first_loop_exit);
+ subw(idx, idx, 1);
+ bltz(idx, L_one_y);
+
+ shadd(t0, idx, y, t0, LogBytesPerInt);
+ ld(y_idx, Address(t0, 0));
+ ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian
+ bind(L_multiply);
+
+ mulhu(t0, x_xstart, y_idx);
+ mul(product, x_xstart, y_idx);
+ cad(product, product, carry, t1);
+ adc(carry, t0, zr, t1);
+
+ subw(kdx, kdx, 2);
+ ror_imm(product, product, 32); // back to big-endian
+ shadd(t0, kdx, z, t0, LogBytesPerInt);
+ sd(product, Address(t0, 0));
+
+ j(L_first_loop);
+
+ bind(L_one_y);
+ lwu(y_idx, Address(y, 0));
+ j(L_multiply);
+
+ bind(L_one_x);
+ lwu(x_xstart, Address(x, 0));
+ j(L_first_loop);
+
+ bind(L_first_loop_exit);
+}
+
+/**
+ * Multiply 128 bit by 128 bit. Unrolled inner loop.
+ *
+ */
+void MacroAssembler::multiply_128_x_128_loop(Register y, Register z,
+ Register carry, Register carry2,
+ Register idx, Register jdx,
+ Register yz_idx1, Register yz_idx2,
+ Register tmp, Register tmp3, Register tmp4,
+ Register tmp6, Register product_hi)
+{
+ // jlong carry, x[], y[], z[];
+ // int kdx = xstart+1;
+ // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
+ // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry;
+ // jlong carry2 = (jlong)(tmp3 >>> 64);
+ // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2;
+ // carry = (jlong)(tmp4 >>> 64);
+ // z[kdx+idx+1] = (jlong)tmp3;
+ // z[kdx+idx] = (jlong)tmp4;
+ // }
+ // idx += 2;
+ // if (idx > 0) {
+ // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry;
+ // z[kdx+idx] = (jlong)yz_idx1;
+ // carry = (jlong)(yz_idx1 >>> 64);
+ // }
+ //
+
+ Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
+
+ srliw(jdx, idx, 2);
+
+ bind(L_third_loop);
+
+ subw(jdx, jdx, 1);
+ bltz(jdx, L_third_loop_exit);
+ subw(idx, idx, 4);
+
+ shadd(t0, idx, y, t0, LogBytesPerInt);
+ ld(yz_idx2, Address(t0, 0));
+ ld(yz_idx1, Address(t0, wordSize));
+
+ shadd(tmp6, idx, z, t0, LogBytesPerInt);
+
+ ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian
+ ror_imm(yz_idx2, yz_idx2, 32);
+
+ ld(t1, Address(tmp6, 0));
+ ld(t0, Address(tmp6, wordSize));
+
+ mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3
+ mulhu(tmp4, product_hi, yz_idx1);
+
+ ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian
+ ror_imm(t1, t1, 32, tmp);
+
+ mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp
+ mulhu(carry2, product_hi, yz_idx2);
+
+ cad(tmp3, tmp3, carry, carry);
+ adc(tmp4, tmp4, zr, carry);
+ cad(tmp3, tmp3, t0, t0);
+ cadc(tmp4, tmp4, tmp, t0);
+ adc(carry, carry2, zr, t0);
+ cad(tmp4, tmp4, t1, carry2);
+ adc(carry, carry, zr, carry2);
+
+ ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian
+ ror_imm(tmp4, tmp4, 32);
+ sd(tmp4, Address(tmp6, 0));
+ sd(tmp3, Address(tmp6, wordSize));
+
+ j(L_third_loop);
+
+ bind(L_third_loop_exit);
+
+ andi(idx, idx, 0x3);
+ beqz(idx, L_post_third_loop_done);
+
+ Label L_check_1;
+ subw(idx, idx, 2);
+ bltz(idx, L_check_1);
+
+ shadd(t0, idx, y, t0, LogBytesPerInt);
+ ld(yz_idx1, Address(t0, 0));
+ ror_imm(yz_idx1, yz_idx1, 32);
+
+ mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3
+ mulhu(tmp4, product_hi, yz_idx1);
+
+ shadd(t0, idx, z, t0, LogBytesPerInt);
+ ld(yz_idx2, Address(t0, 0));
+ ror_imm(yz_idx2, yz_idx2, 32, tmp);
+
+ add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp);
+
+ ror_imm(tmp3, tmp3, 32, tmp);
+ sd(tmp3, Address(t0, 0));
+
+ bind(L_check_1);
+
+ andi(idx, idx, 0x1);
+ subw(idx, idx, 1);
+ bltz(idx, L_post_third_loop_done);
+ shadd(t0, idx, y, t0, LogBytesPerInt);
+ lwu(tmp4, Address(t0, 0));
+ mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3
+ mulhu(carry2, tmp4, product_hi);
+
+ shadd(t0, idx, z, t0, LogBytesPerInt);
+ lwu(tmp4, Address(t0, 0));
+
+ add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0);
+
+ shadd(t0, idx, z, t0, LogBytesPerInt);
+ sw(tmp3, Address(t0, 0));
+
+ slli(t0, carry2, 32);
+ srli(carry, tmp3, 32);
+ orr(carry, carry, t0);
+
+ bind(L_post_third_loop_done);
+}
+
+/**
+ * Code for BigInteger::multiplyToLen() intrinsic.
+ *
+ * x10: x
+ * x11: xlen
+ * x12: y
+ * x13: ylen
+ * x14: z
+ * x15: zlen
+ * x16: tmp1
+ * x17: tmp2
+ * x7: tmp3
+ * x28: tmp4
+ * x29: tmp5
+ * x30: tmp6
+ * x31: tmp7
+ */
+void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
+ Register z, Register zlen,
+ Register tmp1, Register tmp2, Register tmp3, Register tmp4,
+ Register tmp5, Register tmp6, Register product_hi)
+{
+ assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
+
+ const Register idx = tmp1;
+ const Register kdx = tmp2;
+ const Register xstart = tmp3;
+
+ const Register y_idx = tmp4;
+ const Register carry = tmp5;
+ const Register product = xlen;
+ const Register x_xstart = zlen; // reuse register
+
+ mv(idx, ylen); // idx = ylen;
+ mv(kdx, zlen); // kdx = xlen+ylen;
+ mv(carry, zr); // carry = 0;
+
+ Label L_multiply_64_x_64_loop, L_done;
+
+ subw(xstart, xlen, 1);
+ bltz(xstart, L_done);
+
+ const Register jdx = tmp1;
+
+ if (AvoidUnalignedAccesses) {
+ // Check if x and y are both 8-byte aligned.
+ orr(t0, xlen, ylen);
+ andi(t0, t0, 0x1);
+ beqz(t0, L_multiply_64_x_64_loop);
+
+ multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
+ shadd(t0, xstart, z, t0, LogBytesPerInt);
+ sw(carry, Address(t0, 0));
+
+ Label L_second_loop_unaligned;
+ bind(L_second_loop_unaligned);
+ mv(carry, zr);
+ mv(jdx, ylen);
+ subw(xstart, xstart, 1);
+ bltz(xstart, L_done);
+ sub(sp, sp, 2 * wordSize);
+ sd(z, Address(sp, 0));
+ sd(zr, Address(sp, wordSize));
+ shadd(t0, xstart, z, t0, LogBytesPerInt);
+ addi(z, t0, 4);
+ shadd(t0, xstart, x, t0, LogBytesPerInt);
+ lwu(product, Address(t0, 0));
+ Label L_third_loop, L_third_loop_exit;
+
+ blez(jdx, L_third_loop_exit);
+
+ bind(L_third_loop);
+ subw(jdx, jdx, 1);
+ shadd(t0, jdx, y, t0, LogBytesPerInt);
+ lwu(t0, Address(t0, 0));
+ mul(t1, t0, product);
+ add(t0, t1, carry);
+ shadd(tmp6, jdx, z, t1, LogBytesPerInt);
+ lwu(t1, Address(tmp6, 0));
+ add(t0, t0, t1);
+ sw(t0, Address(tmp6, 0));
+ srli(carry, t0, 32);
+ bgtz(jdx, L_third_loop);
+
+ bind(L_third_loop_exit);
+ ld(z, Address(sp, 0));
+ addi(sp, sp, 2 * wordSize);
+ shadd(t0, xstart, z, t0, LogBytesPerInt);
+ sw(carry, Address(t0, 0));
+
+ j(L_second_loop_unaligned);
+ }
+
+ bind(L_multiply_64_x_64_loop);
+ multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
+
+ Label L_second_loop_aligned;
+ beqz(kdx, L_second_loop_aligned);
+
+ Label L_carry;
+ subw(kdx, kdx, 1);
+ beqz(kdx, L_carry);
+
+ shadd(t0, kdx, z, t0, LogBytesPerInt);
+ sw(carry, Address(t0, 0));
+ srli(carry, carry, 32);
+ subw(kdx, kdx, 1);
+
+ bind(L_carry);
+ shadd(t0, kdx, z, t0, LogBytesPerInt);
+ sw(carry, Address(t0, 0));
+
+ // Second and third (nested) loops.
+ //
+ // for (int i = xstart-1; i >= 0; i--) { // Second loop
+ // carry = 0;
+ // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
+ // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
+ // (z[k] & LONG_MASK) + carry;
+ // z[k] = (int)product;
+ // carry = product >>> 32;
+ // }
+ // z[i] = (int)carry;
+ // }
+ //
+ // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi
+
+ bind(L_second_loop_aligned);
+ mv(carry, zr); // carry = 0;
+ mv(jdx, ylen); // j = ystart+1
+
+ subw(xstart, xstart, 1); // i = xstart-1;
+ bltz(xstart, L_done);
+
+ sub(sp, sp, 4 * wordSize);
+ sd(z, Address(sp, 0));
+
+ Label L_last_x;
+ shadd(t0, xstart, z, t0, LogBytesPerInt);
+ addi(z, t0, 4);
+ subw(xstart, xstart, 1); // i = xstart-1;
+ bltz(xstart, L_last_x);
+
+ shadd(t0, xstart, x, t0, LogBytesPerInt);
+ ld(product_hi, Address(t0, 0));
+ ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian
+
+ Label L_third_loop_prologue;
+ bind(L_third_loop_prologue);
+
+ sd(ylen, Address(sp, wordSize));
+ sd(x, Address(sp, 2 * wordSize));
+ sd(xstart, Address(sp, 3 * wordSize));
+ multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product,
+ tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi);
+ ld(z, Address(sp, 0));
+ ld(ylen, Address(sp, wordSize));
+ ld(x, Address(sp, 2 * wordSize));
+ ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen
+ addi(sp, sp, 4 * wordSize);
+
+ addiw(tmp3, xlen, 1);
+ shadd(t0, tmp3, z, t0, LogBytesPerInt);
+ sw(carry, Address(t0, 0));
+
+ subw(tmp3, tmp3, 1);
+ bltz(tmp3, L_done);
+
+ srli(carry, carry, 32);
+ shadd(t0, tmp3, z, t0, LogBytesPerInt);
+ sw(carry, Address(t0, 0));
+ j(L_second_loop_aligned);
+
+ // Next infrequent code is moved outside loops.
+ bind(L_last_x);
+ lwu(product_hi, Address(x, 0));
+ j(L_third_loop_prologue);
+
+ bind(L_done);
+}
+#endif
+
+// Count bits of trailing zero chars from lsb to msb until first non-zero element.
+// For LL case, one byte for one element, so shift 8 bits once, and for other case,
+// shift 16 bits once.
+void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2)
+{
+ if (UseZbb) {
+ assert_different_registers(Rd, Rs, tmp1);
+ int step = isLL ? 8 : 16;
+ ctz(Rd, Rs);
+ andi(tmp1, Rd, step - 1);
+ sub(Rd, Rd, tmp1);
+ return;
+ }
+ assert_different_registers(Rd, Rs, tmp1, tmp2);
+ Label Loop;
+ int step = isLL ? 8 : 16;
+ li(Rd, -step);
+ mv(tmp2, Rs);
+
+ bind(Loop);
+ addi(Rd, Rd, step);
+ andi(tmp1, tmp2, ((1 << step) - 1));
+ srli(tmp2, tmp2, step);
+ beqz(tmp1, Loop);
+}
+
+// This instruction reads adjacent 4 bytes from the lower half of source register,
+// inflate into a register, for example:
+// Rs: A7A6A5A4A3A2A1A0
+// Rd: 00A300A200A100A0
+void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2)
+{
+ assert_different_registers(Rd, Rs, tmp1, tmp2);
+ li(tmp1, 0xFF);
+ mv(Rd, zr);
+ for (int i = 0; i <= 3; i++)
+ {
+ andr(tmp2, Rs, tmp1);
+ if (i) {
+ slli(tmp2, tmp2, i * 8);
+ }
+ orr(Rd, Rd, tmp2);
+ if (i != 3) {
+ slli(tmp1, tmp1, 8);
+ }
+ }
+}
+
+// This instruction reads adjacent 4 bytes from the upper half of source register,
+// inflate into a register, for example:
+// Rs: A7A6A5A4A3A2A1A0
+// Rd: 00A700A600A500A4
+void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2)
+{
+ assert_different_registers(Rd, Rs, tmp1, tmp2);
+ li(tmp1, 0xFF00000000);
+ mv(Rd, zr);
+ for (int i = 0; i <= 3; i++)
+ {
+ andr(tmp2, Rs, tmp1);
+ orr(Rd, Rd, tmp2);
+ srli(Rd, Rd, 8);
+ if (i != 3) {
+ slli(tmp1, tmp1, 8);
+ }
+ }
+}
+
+// The size of the blocks erased by the zero_blocks stub. We must
+// handle anything smaller than this ourselves in zero_words().
+const int MacroAssembler::zero_words_block_size = 8;
+
+// zero_words() is used by C2 ClearArray patterns. It is as small as
+// possible, handling small word counts locally and delegating
+// anything larger to the zero_blocks stub. It is expanded many times
+// in compiled code, so it is important to keep it short.
+
+// ptr: Address of a buffer to be zeroed.
+// cnt: Count in HeapWords.
+//
+// ptr, cnt, and t0 are clobbered.
+address MacroAssembler::zero_words(Register ptr, Register cnt)
+{
+ assert(is_power_of_2(zero_words_block_size), "adjust this");
+ assert(ptr == x28 && cnt == x29, "mismatch in register usage");
+ assert_different_registers(cnt, t0);
+
+ BLOCK_COMMENT("zero_words {");
+ mv(t0, zero_words_block_size);
+ Label around, done, done16;
+ bltu(cnt, t0, around);
+ {
+ RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks());
+ assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated");
+ if (StubRoutines::riscv::complete()) {
+ address tpc = trampoline_call(zero_blocks);
+ if (tpc == NULL) {
+ DEBUG_ONLY(reset_labels(around));
+ postcond(pc() == badAddress);
+ return NULL;
+ }
+ } else {
+ jal(zero_blocks);
+ }
+ }
+ bind(around);
+ for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) {
+ Label l;
+ andi(t0, cnt, i);
+ beqz(t0, l);
+ for (int j = 0; j < i; j++) {
+ sd(zr, Address(ptr, 0));
+ addi(ptr, ptr, 8);
+ }
+ bind(l);
+ }
+ {
+ Label l;
+ andi(t0, cnt, 1);
+ beqz(t0, l);
+ sd(zr, Address(ptr, 0));
+ bind(l);
+ }
+ BLOCK_COMMENT("} zero_words");
+ postcond(pc() != badAddress);
+ return pc();
+}
+
+#define SmallArraySize (18 * BytesPerLong)
+
+// base: Address of a buffer to be zeroed, 8 bytes aligned.
+// cnt: Immediate count in HeapWords.
+void MacroAssembler::zero_words(Register base, u_int64_t cnt)
+{
+ assert_different_registers(base, t0, t1);
+
+ BLOCK_COMMENT("zero_words {");
+
+ if (cnt <= SmallArraySize / BytesPerLong) {
+ for (int i = 0; i < (int)cnt; i++) {
+ sd(zr, Address(base, i * wordSize));
+ }
+ } else {
+ const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll
+ int remainder = cnt % unroll;
+ for (int i = 0; i < remainder; i++) {
+ sd(zr, Address(base, i * wordSize));
+ }
+
+ Label loop;
+ Register cnt_reg = t0;
+ Register loop_base = t1;
+ cnt = cnt - remainder;
+ li(cnt_reg, cnt);
+ add(loop_base, base, remainder * wordSize);
+ bind(loop);
+ sub(cnt_reg, cnt_reg, unroll);
+ for (int i = 0; i < unroll; i++) {
+ sd(zr, Address(loop_base, i * wordSize));
+ }
+ add(loop_base, loop_base, unroll * wordSize);
+ bnez(cnt_reg, loop);
+ }
+
+ BLOCK_COMMENT("} zero_words");
+}
+
+// base: Address of a buffer to be filled, 8 bytes aligned.
+// cnt: Count in 8-byte unit.
+// value: Value to be filled with.
+// base will point to the end of the buffer after filling.
+void MacroAssembler::fill_words(Register base, Register cnt, Register value)
+{
+// Algorithm:
+//
+// t0 = cnt & 7
+// cnt -= t0
+// p += t0
+// switch (t0):
+// switch start:
+// do while cnt
+// cnt -= 8
+// p[-8] = value
+// case 7:
+// p[-7] = value
+// case 6:
+// p[-6] = value
+// // ...
+// case 1:
+// p[-1] = value
+// case 0:
+// p += 8
+// do-while end
+// switch end
+
+ assert_different_registers(base, cnt, value, t0, t1);
+
+ Label fini, skip, entry, loop;
+ const int unroll = 8; // Number of sd instructions we'll unroll
+
+ beqz(cnt, fini);
+
+ andi(t0, cnt, unroll - 1);
+ sub(cnt, cnt, t0);
+ // align 8, so first sd n % 8 = mod, next loop sd 8 * n.
+ shadd(base, t0, base, t1, 3);
+ la(t1, entry);
+ slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst)
+ sub(t1, t1, t0);
+ jr(t1);
+
+ bind(loop);
+ add(base, base, unroll * 8);
+ for (int i = -unroll; i < 0; i++) {
+ sd(value, Address(base, i * 8));
+ }
+ bind(entry);
+ sub(cnt, cnt, unroll);
+ bgez(cnt, loop);
+
+ bind(fini);
+}
+
+#define FCVT_SAFE(FLOATCVT, FLOATEQ) \
+void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \
+ Label L_Okay; \
+ fscsr(zr); \
+ FLOATCVT(dst, src); \
+ frcsr(tmp); \
+ andi(tmp, tmp, 0x1E); \
+ beqz(tmp, L_Okay); \
+ FLOATEQ(tmp, src, src); \
+ bnez(tmp, L_Okay); \
+ mv(dst, zr); \
+ bind(L_Okay); \
+}
+
+FCVT_SAFE(fcvt_w_s, feq_s)
+FCVT_SAFE(fcvt_l_s, feq_s)
+FCVT_SAFE(fcvt_w_d, feq_d)
+FCVT_SAFE(fcvt_l_d, feq_d)
+
+#undef FCVT_SAFE
+
+#define FCMP(FLOATTYPE, FLOATSIG) \
+void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1, \
+ FloatRegister Rs2, int unordered_result) { \
+ Label Ldone; \
+ if (unordered_result < 0) { \
+ /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */ \
+ /* installs 1 if gt else 0 */ \
+ flt_##FLOATSIG(result, Rs2, Rs1); \
+ /* Rs1 > Rs2, install 1 */ \
+ bgtz(result, Ldone); \
+ feq_##FLOATSIG(result, Rs1, Rs2); \
+ addi(result, result, -1); \
+ /* Rs1 = Rs2, install 0 */ \
+ /* NaN or Rs1 < Rs2, install -1 */ \
+ bind(Ldone); \
+ } else { \
+ /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */ \
+ /* installs 1 if gt or unordered else 0 */ \
+ flt_##FLOATSIG(result, Rs1, Rs2); \
+ /* Rs1 < Rs2, install -1 */ \
+ bgtz(result, Ldone); \
+ feq_##FLOATSIG(result, Rs1, Rs2); \
+ addi(result, result, -1); \
+ /* Rs1 = Rs2, install 0 */ \
+ /* NaN or Rs1 > Rs2, install 1 */ \
+ bind(Ldone); \
+ neg(result, result); \
+ } \
+}
+
+FCMP(float, s);
+FCMP(double, d);
+
+#undef FCMP
+
+// Zero words; len is in bytes
+// Destroys all registers except addr
+// len must be a nonzero multiple of wordSize
+void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) {
+ assert_different_registers(addr, len, tmp, t0, t1);
+
+#ifdef ASSERT
+ {
+ Label L;
+ andi(t0, len, BytesPerWord - 1);
+ beqz(t0, L);
+ stop("len is not a multiple of BytesPerWord");
+ bind(L);
+ }
+#endif // ASSERT
+
+#ifndef PRODUCT
+ block_comment("zero memory");
+#endif // PRODUCT
+
+ Label loop;
+ Label entry;
+
+ // Algorithm:
+ //
+ // t0 = cnt & 7
+ // cnt -= t0
+ // p += t0
+ // switch (t0) {
+ // do {
+ // cnt -= 8
+ // p[-8] = 0
+ // case 7:
+ // p[-7] = 0
+ // case 6:
+ // p[-6] = 0
+ // ...
+ // case 1:
+ // p[-1] = 0
+ // case 0:
+ // p += 8
+ // } while (cnt)
+ // }
+
+ const int unroll = 8; // Number of sd(zr) instructions we'll unroll
+
+ srli(len, len, LogBytesPerWord);
+ andi(t0, len, unroll - 1); // t0 = cnt % unroll
+ sub(len, len, t0); // cnt -= unroll
+ // tmp always points to the end of the region we're about to zero
+ shadd(tmp, t0, addr, t1, LogBytesPerWord);
+ la(t1, entry);
+ slli(t0, t0, 2);
+ sub(t1, t1, t0);
+ jr(t1);
+ bind(loop);
+ sub(len, len, unroll);
+ for (int i = -unroll; i < 0; i++) {
+ Assembler::sd(zr, Address(tmp, i * wordSize));
+ }
+ bind(entry);
+ add(tmp, tmp, unroll * wordSize);
+ bnez(len, loop);
+}
+
+// shift left by shamt and add
+// Rd = (Rs1 << shamt) + Rs2
+void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) {
+ if (UseZba) {
+ if (shamt == 1) {
+ sh1add(Rd, Rs1, Rs2);
+ return;
+ } else if (shamt == 2) {
+ sh2add(Rd, Rs1, Rs2);
+ return;
+ } else if (shamt == 3) {
+ sh3add(Rd, Rs1, Rs2);
+ return;
+ }
+ }
+
+ if (shamt != 0) {
+ slli(tmp, Rs1, shamt);
+ add(Rd, Rs2, tmp);
+ } else {
+ add(Rd, Rs1, Rs2);
+ }
+}
+
+void MacroAssembler::zero_extend(Register dst, Register src, int bits) {
+ if (UseZba && bits == 32) {
+ zext_w(dst, src);
+ return;
+ }
+
+ if (UseZbb && bits == 16) {
+ zext_h(dst, src);
+ return;
+ }
+
+ if (bits == 8) {
+ zext_b(dst, src);
+ } else {
+ slli(dst, src, XLEN - bits);
+ srli(dst, dst, XLEN - bits);
+ }
+}
+
+void MacroAssembler::sign_extend(Register dst, Register src, int bits) {
+ if (UseZbb) {
+ if (bits == 8) {
+ sext_b(dst, src);
+ return;
+ } else if (bits == 16) {
+ sext_h(dst, src);
+ return;
+ }
+ }
+
+ if (bits == 32) {
+ sext_w(dst, src);
+ } else {
+ slli(dst, src, XLEN - bits);
+ srai(dst, dst, XLEN - bits);
+ }
+}
+
+void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp)
+{
+ if (src1 == src2) {
+ mv(dst, zr);
+ return;
+ }
+ Label done;
+ Register left = src1;
+ Register right = src2;
+ if (dst == src1) {
+ assert_different_registers(dst, src2, tmp);
+ mv(tmp, src1);
+ left = tmp;
+ } else if (dst == src2) {
+ assert_different_registers(dst, src1, tmp);
+ mv(tmp, src2);
+ right = tmp;
+ }
+
+ // installs 1 if gt else 0
+ slt(dst, right, left);
+ bnez(dst, done);
+ slt(dst, left, right);
+ // dst = -1 if lt; else if eq , dst = 0
+ neg(dst, dst);
+ bind(done);
+}
+
+void MacroAssembler::safepoint_ifence() {
+ ifence();
+#ifndef PRODUCT
+ if (VerifyCrossModifyFence) {
+ // Clear the thread state.
+ sb(zr, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset())));
+ }
+#endif
+}
+
+#ifndef PRODUCT
+void MacroAssembler::verify_cross_modify_fence_not_required() {
+ if (VerifyCrossModifyFence) {
+ // Check if thread needs a cross modify fence.
+ lbu(t0, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset())));
+ Label fence_not_required;
+ beqz(t0, fence_not_required);
+ // If it does then fail.
+ la(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::verify_cross_modify_fence_failure)));
+ mv(c_rarg0, xthread);
+ jalr(t0);
+ bind(fence_not_required);
+ }
+}
+#endif
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..23e09475be1be7430c136ee8bf4cbab050dda213
--- /dev/null
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
@@ -0,0 +1,858 @@
+/*
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP
+#define CPU_RISCV_MACROASSEMBLER_RISCV_HPP
+
+#include "asm/assembler.hpp"
+#include "metaprogramming/enableIf.hpp"
+#include "oops/compressedOops.hpp"
+#include "utilities/powerOfTwo.hpp"
+
+// MacroAssembler extends Assembler by frequently used macros.
+//
+// Instructions for which a 'better' code sequence exists depending
+// on arguments should also go in here.
+
+class MacroAssembler: public Assembler {
+
+ public:
+ MacroAssembler(CodeBuffer* code) : Assembler(code) {
+ }
+ virtual ~MacroAssembler() {}
+
+ void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod);
+
+ // Place a fence.i after code may have been modified due to a safepoint.
+ void safepoint_ifence();
+
+ // Alignment
+ void align(int modulus, int extra_offset = 0);
+
+ // Stack frame creation/removal
+ // Note that SP must be updated to the right place before saving/restoring RA and FP
+ // because signal based thread suspend/resume could happen asynchronously.
+ void enter() {
+ addi(sp, sp, - 2 * wordSize);
+ sd(ra, Address(sp, wordSize));
+ sd(fp, Address(sp));
+ addi(fp, sp, 2 * wordSize);
+ }
+
+ void leave() {
+ addi(sp, fp, - 2 * wordSize);
+ ld(fp, Address(sp));
+ ld(ra, Address(sp, wordSize));
+ addi(sp, sp, 2 * wordSize);
+ }
+
+
+ // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
+ // The pointer will be loaded into the thread register.
+ void get_thread(Register thread);
+
+ // Support for VM calls
+ //
+ // It is imperative that all calls into the VM are handled via the call_VM macros.
+ // They make sure that the stack linkage is setup correctly. call_VM's correspond
+ // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
+
+ void call_VM(Register oop_result,
+ address entry_point,
+ bool check_exceptions = true);
+ void call_VM(Register oop_result,
+ address entry_point,
+ Register arg_1,
+ bool check_exceptions = true);
+ void call_VM(Register oop_result,
+ address entry_point,
+ Register arg_1, Register arg_2,
+ bool check_exceptions = true);
+ void call_VM(Register oop_result,
+ address entry_point,
+ Register arg_1, Register arg_2, Register arg_3,
+ bool check_exceptions = true);
+
+ // Overloadings with last_Java_sp
+ void call_VM(Register oop_result,
+ Register last_java_sp,
+ address entry_point,
+ int number_of_arguments = 0,
+ bool check_exceptions = true);
+ void call_VM(Register oop_result,
+ Register last_java_sp,
+ address entry_point,
+ Register arg_1,
+ bool check_exceptions = true);
+ void call_VM(Register oop_result,
+ Register last_java_sp,
+ address entry_point,
+ Register arg_1, Register arg_2,
+ bool check_exceptions = true);
+ void call_VM(Register oop_result,
+ Register last_java_sp,
+ address entry_point,
+ Register arg_1, Register arg_2, Register arg_3,
+ bool check_exceptions = true);
+
+ void get_vm_result(Register oop_result, Register java_thread);
+ void get_vm_result_2(Register metadata_result, Register java_thread);
+
+ // These always tightly bind to MacroAssembler::call_VM_leaf_base
+ // bypassing the virtual implementation
+ void call_VM_leaf(address entry_point,
+ int number_of_arguments = 0);
+ void call_VM_leaf(address entry_point,
+ Register arg_0);
+ void call_VM_leaf(address entry_point,
+ Register arg_0, Register arg_1);
+ void call_VM_leaf(address entry_point,
+ Register arg_0, Register arg_1, Register arg_2);
+
+ // These always tightly bind to MacroAssembler::call_VM_base
+ // bypassing the virtual implementation
+ void super_call_VM_leaf(address entry_point, Register arg_0);
+ void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1);
+ void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2);
+ void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3);
+
+ // last Java Frame (fills frame anchor)
+ void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp);
+ void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp);
+ void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp);
+
+ // thread in the default location (xthread)
+ void reset_last_Java_frame(bool clear_fp);
+
+ void call_native(address entry_point,
+ Register arg_0);
+ void call_native_base(
+ address entry_point, // the entry point
+ Label* retaddr = NULL
+ );
+
+ virtual void call_VM_leaf_base(
+ address entry_point, // the entry point
+ int number_of_arguments, // the number of arguments to pop after the call
+ Label* retaddr = NULL
+ );
+
+ virtual void call_VM_leaf_base(
+ address entry_point, // the entry point
+ int number_of_arguments, // the number of arguments to pop after the call
+ Label& retaddr) {
+ call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
+ }
+
+ virtual void call_VM_base( // returns the register containing the thread upon return
+ Register oop_result, // where an oop-result ends up if any; use noreg otherwise
+ Register java_thread, // the thread if computed before ; use noreg otherwise
+ Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise
+ address entry_point, // the entry point
+ int number_of_arguments, // the number of arguments (w/o thread) to pop after the call
+ bool check_exceptions // whether to check for pending exceptions after return
+ );
+
+ void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions);
+
+ virtual void check_and_handle_earlyret(Register java_thread);
+ virtual void check_and_handle_popframe(Register java_thread);
+
+ void resolve_weak_handle(Register result, Register tmp);
+ void resolve_oop_handle(Register result, Register tmp = x15);
+ void resolve_jobject(Register value, Register thread, Register tmp);
+
+ void movoop(Register dst, jobject obj, bool immediate = false);
+ void mov_metadata(Register dst, Metadata* obj);
+ void bang_stack_size(Register size, Register tmp);
+ void set_narrow_oop(Register dst, jobject obj);
+ void set_narrow_klass(Register dst, Klass* k);
+
+ void load_mirror(Register dst, Register method, Register tmp = x15);
+ void access_load_at(BasicType type, DecoratorSet decorators, Register dst,
+ Address src, Register tmp1, Register thread_tmp);
+ void access_store_at(BasicType type, DecoratorSet decorators, Address dst,
+ Register src, Register tmp1, Register thread_tmp);
+ void load_klass(Register dst, Register src);
+ void store_klass(Register dst, Register src);
+ void cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L);
+
+ void encode_klass_not_null(Register r);
+ void decode_klass_not_null(Register r);
+ void encode_klass_not_null(Register dst, Register src, Register tmp = xheapbase);
+ void decode_klass_not_null(Register dst, Register src, Register tmp = xheapbase);
+ void decode_heap_oop_not_null(Register r);
+ void decode_heap_oop_not_null(Register dst, Register src);
+ void decode_heap_oop(Register d, Register s);
+ void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
+ void encode_heap_oop(Register d, Register s);
+ void encode_heap_oop(Register r) { encode_heap_oop(r, r); };
+ void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
+ Register thread_tmp = noreg, DecoratorSet decorators = 0);
+ void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
+ Register thread_tmp = noreg, DecoratorSet decorators = 0);
+ void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
+ Register thread_tmp = noreg, DecoratorSet decorators = 0);
+
+ void store_klass_gap(Register dst, Register src);
+
+ // currently unimplemented
+ // Used for storing NULL. All other oop constants should be
+ // stored using routines that take a jobject.
+ void store_heap_oop_null(Address dst);
+
+ // This dummy is to prevent a call to store_heap_oop from
+ // converting a zero (linke NULL) into a Register by giving
+ // the compiler two choices it can't resolve
+
+ void store_heap_oop(Address dst, void* dummy);
+
+ // Support for NULL-checks
+ //
+ // Generates code that causes a NULL OS exception if the content of reg is NULL.
+ // If the accessed location is M[reg + offset] and the offset is known, provide the
+ // offset. No explicit code generateion is needed if the offset is within a certain
+ // range (0 <= offset <= page_size).
+
+ virtual void null_check(Register reg, int offset = -1);
+ static bool needs_explicit_null_check(intptr_t offset);
+ static bool uses_implicit_null_check(void* address);
+
+ // idiv variant which deals with MINLONG as dividend and -1 as divisor
+ int corrected_idivl(Register result, Register rs1, Register rs2,
+ bool want_remainder);
+ int corrected_idivq(Register result, Register rs1, Register rs2,
+ bool want_remainder);
+
+ // interface method calling
+ void lookup_interface_method(Register recv_klass,
+ Register intf_klass,
+ RegisterOrConstant itable_index,
+ Register method_result,
+ Register scan_tmp,
+ Label& no_such_interface,
+ bool return_method = true);
+
+ // virtual method calling
+ // n.n. x86 allows RegisterOrConstant for vtable_index
+ void lookup_virtual_method(Register recv_klass,
+ RegisterOrConstant vtable_index,
+ Register method_result);
+
+ // Form an addres from base + offset in Rd. Rd my or may not
+ // actually be used: you must use the Address that is returned. It
+ // is up to you to ensure that the shift provided mathces the size
+ // of your data.
+ Address form_address(Register Rd, Register base, long byte_offset);
+
+ // allocation
+ void tlab_allocate(
+ Register obj, // result: pointer to object after successful allocation
+ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
+ int con_size_in_bytes, // object size in bytes if known at compile time
+ Register tmp1, // temp register
+ Register tmp2, // temp register
+ Label& slow_case, // continuation point of fast allocation fails
+ bool is_far = false
+ );
+
+ void eden_allocate(
+ Register obj, // result: pointer to object after successful allocation
+ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
+ int con_size_in_bytes, // object size in bytes if known at compile time
+ Register tmp, // temp register
+ Label& slow_case, // continuation point if fast allocation fails
+ bool is_far = false
+ );
+
+ // Test sub_klass against super_klass, with fast and slow paths.
+
+ // The fast path produces a tri-state answer: yes / no / maybe-slow.
+ // One of the three labels can be NULL, meaning take the fall-through.
+ // If super_check_offset is -1, the value is loaded up from super_klass.
+ // No registers are killed, except tmp_reg
+ void check_klass_subtype_fast_path(Register sub_klass,
+ Register super_klass,
+ Register tmp_reg,
+ Label* L_success,
+ Label* L_failure,
+ Label* L_slow_path,
+ Register super_check_offset = noreg);
+
+ // The reset of the type cehck; must be wired to a corresponding fast path.
+ // It does not repeat the fast path logic, so don't use it standalone.
+ // The tmp1_reg and tmp2_reg can be noreg, if no temps are avaliable.
+ // Updates the sub's secondary super cache as necessary.
+ void check_klass_subtype_slow_path(Register sub_klass,
+ Register super_klass,
+ Register tmp1_reg,
+ Register tmp2_reg,
+ Label* L_success,
+ Label* L_failure);
+
+ void check_klass_subtype(Register sub_klass,
+ Register super_klass,
+ Register tmp_reg,
+ Label& L_success);
+
+ Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
+
+ // only if +VerifyOops
+ void verify_oop(Register reg, const char* s = "broken oop");
+ void verify_oop_addr(Address addr, const char* s = "broken oop addr");
+
+ void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {}
+ void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {}
+
+#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
+#define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
+
+ // A more convenient access to fence for our purposes
+ // We used four bit to indicate the read and write bits in the predecessors and successors,
+ // and extended i for r, o for w if UseConservativeFence enabled.
+ enum Membar_mask_bits {
+ StoreStore = 0b0101, // (pred = ow + succ = ow)
+ LoadStore = 0b1001, // (pred = ir + succ = ow)
+ StoreLoad = 0b0110, // (pred = ow + succ = ir)
+ LoadLoad = 0b1010, // (pred = ir + succ = ir)
+ AnyAny = LoadStore | StoreLoad // (pred = iorw + succ = iorw)
+ };
+
+ void membar(uint32_t order_constraint);
+
+ static void membar_mask_to_pred_succ(uint32_t order_constraint, uint32_t& predecessor, uint32_t& successor) {
+ predecessor = (order_constraint >> 2) & 0x3;
+ successor = order_constraint & 0x3;
+
+ // extend rw -> iorw:
+ // 01(w) -> 0101(ow)
+ // 10(r) -> 1010(ir)
+ // 11(rw)-> 1111(iorw)
+ if (UseConservativeFence) {
+ predecessor |= predecessor << 2;
+ successor |= successor << 2;
+ }
+ }
+
+ static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) {
+ return ((predecessor & 0x3) << 2) | (successor & 0x3);
+ }
+
+ // prints msg, dumps registers and stops execution
+ void stop(const char* msg);
+
+ static void debug64(char* msg, int64_t pc, int64_t regs[]);
+
+ void unimplemented(const char* what = "");
+
+ void should_not_reach_here() { stop("should not reach here"); }
+
+ static address target_addr_for_insn(address insn_addr);
+
+ // Required platform-specific helpers for Label::patch_instructions.
+ // They _shadow_ the declarations in AbstractAssembler, which are undefined.
+ static int pd_patch_instruction_size(address branch, address target);
+ static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0) {
+ pd_patch_instruction_size(branch, target);
+ }
+ static address pd_call_destination(address branch) {
+ return target_addr_for_insn(branch);
+ }
+
+ static int patch_oop(address insn_addr, address o);
+ address emit_trampoline_stub(int insts_call_instruction_offset, address target);
+ void emit_static_call_stub();
+
+ // The following 4 methods return the offset of the appropriate move instruction
+
+ // Support for fast byte/short loading with zero extension (depending on particular CPU)
+ int load_unsigned_byte(Register dst, Address src);
+ int load_unsigned_short(Register dst, Address src);
+
+ // Support for fast byte/short loading with sign extension (depending on particular CPU)
+ int load_signed_byte(Register dst, Address src);
+ int load_signed_short(Register dst, Address src);
+
+ // Load and store values by size and signed-ness
+ void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
+ void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
+
+ public:
+ // Standard pseudoinstruction
+ void nop();
+ void mv(Register Rd, Register Rs);
+ void notr(Register Rd, Register Rs);
+ void neg(Register Rd, Register Rs);
+ void negw(Register Rd, Register Rs);
+ void sext_w(Register Rd, Register Rs);
+ void zext_b(Register Rd, Register Rs);
+ void seqz(Register Rd, Register Rs); // set if = zero
+ void snez(Register Rd, Register Rs); // set if != zero
+ void sltz(Register Rd, Register Rs); // set if < zero
+ void sgtz(Register Rd, Register Rs); // set if > zero
+
+ // Float pseudoinstruction
+ void fmv_s(FloatRegister Rd, FloatRegister Rs);
+ void fabs_s(FloatRegister Rd, FloatRegister Rs); // single-precision absolute value
+ void fneg_s(FloatRegister Rd, FloatRegister Rs);
+
+ // Double pseudoinstruction
+ void fmv_d(FloatRegister Rd, FloatRegister Rs);
+ void fabs_d(FloatRegister Rd, FloatRegister Rs);
+ void fneg_d(FloatRegister Rd, FloatRegister Rs);
+
+ // Pseudoinstruction for control and status register
+ void rdinstret(Register Rd); // read instruction-retired counter
+ void rdcycle(Register Rd); // read cycle counter
+ void rdtime(Register Rd); // read time
+ void csrr(Register Rd, unsigned csr); // read csr
+ void csrw(unsigned csr, Register Rs); // write csr
+ void csrs(unsigned csr, Register Rs); // set bits in csr
+ void csrc(unsigned csr, Register Rs); // clear bits in csr
+ void csrwi(unsigned csr, unsigned imm);
+ void csrsi(unsigned csr, unsigned imm);
+ void csrci(unsigned csr, unsigned imm);
+ void frcsr(Register Rd); // read float-point csr
+ void fscsr(Register Rd, Register Rs); // swap float-point csr
+ void fscsr(Register Rs); // write float-point csr
+ void frrm(Register Rd); // read float-point rounding mode
+ void fsrm(Register Rd, Register Rs); // swap float-point rounding mode
+ void fsrm(Register Rs); // write float-point rounding mode
+ void fsrmi(Register Rd, unsigned imm);
+ void fsrmi(unsigned imm);
+ void frflags(Register Rd); // read float-point exception flags
+ void fsflags(Register Rd, Register Rs); // swap float-point exception flags
+ void fsflags(Register Rs); // write float-point exception flags
+ void fsflagsi(Register Rd, unsigned imm);
+ void fsflagsi(unsigned imm);
+
+ void beqz(Register Rs, const address &dest);
+ void bnez(Register Rs, const address &dest);
+ void blez(Register Rs, const address &dest);
+ void bgez(Register Rs, const address &dest);
+ void bltz(Register Rs, const address &dest);
+ void bgtz(Register Rs, const address &dest);
+ void la(Register Rd, Label &label);
+ void la(Register Rd, const address &dest);
+ void la(Register Rd, const Address &adr);
+ //label
+ void beqz(Register Rs, Label &l, bool is_far = false);
+ void bnez(Register Rs, Label &l, bool is_far = false);
+ void blez(Register Rs, Label &l, bool is_far = false);
+ void bgez(Register Rs, Label &l, bool is_far = false);
+ void bltz(Register Rs, Label &l, bool is_far = false);
+ void bgtz(Register Rs, Label &l, bool is_far = false);
+ void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
+ void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
+ void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
+ void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
+ void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
+ void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
+ void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
+ void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
+ void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
+ void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
+ void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
+ void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
+
+ void push_reg(RegSet regs, Register stack) { if (regs.bits()) { push_reg(regs.bits(), stack); } }
+ void pop_reg(RegSet regs, Register stack) { if (regs.bits()) { pop_reg(regs.bits(), stack); } }
+ void push_reg(Register Rs);
+ void pop_reg(Register Rd);
+ int push_reg(unsigned int bitset, Register stack);
+ int pop_reg(unsigned int bitset, Register stack);
+ void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
+ void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); }
+#ifdef COMPILER2
+ void push_vp(VectorRegSet regs, Register stack) { if (regs.bits()) push_vp(regs.bits(), stack); }
+ void pop_vp(VectorRegSet regs, Register stack) { if (regs.bits()) pop_vp(regs.bits(), stack); }
+#endif // COMPILER2
+
+ // Push and pop everything that might be clobbered by a native
+ // runtime call except t0 and t1. (They are always
+ // temporary registers, so we don't have to protect them.)
+ // Additional registers can be excluded in a passed RegSet.
+ void push_call_clobbered_registers_except(RegSet exclude);
+ void pop_call_clobbered_registers_except(RegSet exclude);
+
+ void push_call_clobbered_registers() {
+ push_call_clobbered_registers_except(RegSet());
+ }
+ void pop_call_clobbered_registers() {
+ pop_call_clobbered_registers_except(RegSet());
+ }
+
+ void pusha();
+ void popa();
+ void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0);
+ void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0);
+
+ // if heap base register is used - reinit it with the correct value
+ void reinit_heapbase();
+
+ void bind(Label& L) {
+ Assembler::bind(L);
+ // fences across basic blocks should not be merged
+ code()->clear_last_insn();
+ }
+
+ // mv
+ template::value)>
+ inline void mv(Register Rd, T o) {
+ li(Rd, (int64_t)o);
+ }
+
+ inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); }
+
+ void mv(Register Rd, Address dest);
+ void mv(Register Rd, address addr);
+ void mv(Register Rd, RegisterOrConstant src);
+
+ // logic
+ void andrw(Register Rd, Register Rs1, Register Rs2);
+ void orrw(Register Rd, Register Rs1, Register Rs2);
+ void xorrw(Register Rd, Register Rs1, Register Rs2);
+
+ // revb
+ void revb_h_h(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, sign-extend
+ void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in lower word, sign-extend
+ void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, zero-extend
+ void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in halfwords in lower 32 bits, zero-extend
+ void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in upper 16 bits (48:63) and move to lower
+ void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each halfword
+ void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each word
+ void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in doubleword
+
+ void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0);
+ void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0);
+ void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1);
+
+ void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail);
+ void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail);
+ void cmpxchg(Register addr, Register expected,
+ Register new_val,
+ enum operand_size size,
+ Assembler::Aqrl acquire, Assembler::Aqrl release,
+ Register result, bool result_as_bool = false);
+ void cmpxchg_weak(Register addr, Register expected,
+ Register new_val,
+ enum operand_size size,
+ Assembler::Aqrl acquire, Assembler::Aqrl release,
+ Register result);
+ void cmpxchg_narrow_value_helper(Register addr, Register expected,
+ Register new_val,
+ enum operand_size size,
+ Register tmp1, Register tmp2, Register tmp3);
+ void cmpxchg_narrow_value(Register addr, Register expected,
+ Register new_val,
+ enum operand_size size,
+ Assembler::Aqrl acquire, Assembler::Aqrl release,
+ Register result, bool result_as_bool,
+ Register tmp1, Register tmp2, Register tmp3);
+ void weak_cmpxchg_narrow_value(Register addr, Register expected,
+ Register new_val,
+ enum operand_size size,
+ Assembler::Aqrl acquire, Assembler::Aqrl release,
+ Register result,
+ Register tmp1, Register tmp2, Register tmp3);
+
+ void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
+ void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
+ void atomic_addal(Register prev, RegisterOrConstant incr, Register addr);
+ void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr);
+
+ void atomic_xchg(Register prev, Register newv, Register addr);
+ void atomic_xchgw(Register prev, Register newv, Register addr);
+ void atomic_xchgal(Register prev, Register newv, Register addr);
+ void atomic_xchgalw(Register prev, Register newv, Register addr);
+ void atomic_xchgwu(Register prev, Register newv, Register addr);
+ void atomic_xchgalwu(Register prev, Register newv, Register addr);
+
+ static bool far_branches() {
+ return ReservedCodeCacheSize > branch_range;
+ }
+
+ // Jumps that can reach anywhere in the code cache.
+ // Trashes tmp.
+ void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0);
+ void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0);
+
+ static int far_branch_size() {
+ if (far_branches()) {
+ return 2 * 4; // auipc + jalr, see far_call() & far_jump()
+ } else {
+ return 4;
+ }
+ }
+
+ void load_byte_map_base(Register reg);
+
+ void bang_stack_with_offset(int offset) {
+ // stack grows down, caller passes positive offset
+ assert(offset > 0, "must bang with negative offset");
+ sub(t0, sp, offset);
+ sd(zr, Address(t0));
+ }
+
+ void la_patchable(Register reg1, const Address &dest, int32_t &offset);
+
+ virtual void _call_Unimplemented(address call_site) {
+ mv(t1, call_site);
+ }
+
+ #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
+
+ // Frame creation and destruction shared between JITs.
+ void build_frame(int framesize);
+ void remove_frame(int framesize);
+
+ void reserved_stack_check();
+
+ void get_polling_page(Register dest, relocInfo::relocType rtype);
+ address read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
+
+ address trampoline_call(Address entry, CodeBuffer* cbuf = NULL);
+ address ic_call(address entry, jint method_index = 0);
+
+ void add_memory_int64(const Address dst, int64_t imm);
+ void add_memory_int32(const Address dst, int32_t imm);
+
+ void cmpptr(Register src1, Address src2, Label& equal);
+
+ void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = NULL, Label* L_slow_path = NULL);
+ void load_method_holder_cld(Register result, Register method);
+ void load_method_holder(Register holder, Register method);
+
+ void compute_index(Register str1, Register trailing_zeros, Register match_mask,
+ Register result, Register char_tmp, Register tmp,
+ bool haystack_isL);
+ void compute_match_mask(Register src, Register pattern, Register match_mask,
+ Register mask1, Register mask2);
+
+#ifdef COMPILER2
+ void mul_add(Register out, Register in, Register offset,
+ Register len, Register k, Register tmp);
+ void cad(Register dst, Register src1, Register src2, Register carry);
+ void cadc(Register dst, Register src1, Register src2, Register carry);
+ void adc(Register dst, Register src1, Register src2, Register carry);
+ void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
+ Register src1, Register src2, Register carry);
+ void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
+ Register y, Register y_idx, Register z,
+ Register carry, Register product,
+ Register idx, Register kdx);
+ void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
+ Register y, Register y_idx, Register z,
+ Register carry, Register product,
+ Register idx, Register kdx);
+ void multiply_128_x_128_loop(Register y, Register z,
+ Register carry, Register carry2,
+ Register idx, Register jdx,
+ Register yz_idx1, Register yz_idx2,
+ Register tmp, Register tmp3, Register tmp4,
+ Register tmp6, Register product_hi);
+ void multiply_to_len(Register x, Register xlen, Register y, Register ylen,
+ Register z, Register zlen,
+ Register tmp1, Register tmp2, Register tmp3, Register tmp4,
+ Register tmp5, Register tmp6, Register product_hi);
+#endif
+
+ void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
+ void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
+
+ void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1);
+
+ void zero_words(Register base, u_int64_t cnt);
+ address zero_words(Register ptr, Register cnt);
+ void fill_words(Register base, Register cnt, Register value);
+ void zero_memory(Register addr, Register len, Register tmp);
+
+ // shift left by shamt and add
+ void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt);
+
+ // Here the float instructions with safe deal with some exceptions.
+ // e.g. convert from NaN, +Inf, -Inf to int, float, double
+ // will trigger exception, we need to deal with these situations
+ // to get correct results.
+ void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0);
+ void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0);
+ void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
+ void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
+
+ // vector load/store unit-stride instructions
+ void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
+ switch (sew) {
+ case Assembler::e64:
+ vle64_v(vd, base, vm);
+ break;
+ case Assembler::e32:
+ vle32_v(vd, base, vm);
+ break;
+ case Assembler::e16:
+ vle16_v(vd, base, vm);
+ break;
+ case Assembler::e8: // fall through
+ default:
+ vle8_v(vd, base, vm);
+ break;
+ }
+ }
+
+ void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
+ switch (sew) {
+ case Assembler::e64:
+ vse64_v(store_data, base, vm);
+ break;
+ case Assembler::e32:
+ vse32_v(store_data, base, vm);
+ break;
+ case Assembler::e16:
+ vse16_v(store_data, base, vm);
+ break;
+ case Assembler::e8: // fall through
+ default:
+ vse8_v(store_data, base, vm);
+ break;
+ }
+ }
+
+ static const int zero_words_block_size;
+
+ void cast_primitive_type(BasicType type, Register Rt) {
+ switch (type) {
+ case T_BOOLEAN:
+ sltu(Rt, zr, Rt);
+ break;
+ case T_CHAR :
+ zero_extend(Rt, Rt, 16);
+ break;
+ case T_BYTE :
+ sign_extend(Rt, Rt, 8);
+ break;
+ case T_SHORT :
+ sign_extend(Rt, Rt, 16);
+ break;
+ case T_INT :
+ addw(Rt, Rt, zr);
+ break;
+ case T_LONG : /* nothing to do */ break;
+ case T_VOID : /* nothing to do */ break;
+ case T_FLOAT : /* nothing to do */ break;
+ case T_DOUBLE : /* nothing to do */ break;
+ default: ShouldNotReachHere();
+ }
+ }
+
+ // float cmp with unordered_result
+ void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
+ void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
+
+ // Zero/Sign-extend
+ void zero_extend(Register dst, Register src, int bits);
+ void sign_extend(Register dst, Register src, int bits);
+
+ // compare src1 and src2 and get -1/0/1 in dst.
+ // if [src1 > src2], dst = 1;
+ // if [src1 == src2], dst = 0;
+ // if [src1 < src2], dst = -1;
+ void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0);
+
+ int push_fp(unsigned int bitset, Register stack);
+ int pop_fp(unsigned int bitset, Register stack);
+
+ int push_vp(unsigned int bitset, Register stack);
+ int pop_vp(unsigned int bitset, Register stack);
+
+ // vext
+ void vmnot_m(VectorRegister vd, VectorRegister vs);
+ void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked);
+ void vfneg_v(VectorRegister vd, VectorRegister vs);
+
+private:
+
+#ifdef ASSERT
+ // Template short-hand support to clean-up after a failed call to trampoline
+ // call generation (see trampoline_call() below), when a set of Labels must
+ // be reset (before returning).
+ template
+ void reset_labels(Label& lbl, More&... more) {
+ lbl.reset(); reset_labels(more...);
+ }
+ template
+ void reset_labels(Label& lbl) {
+ lbl.reset();
+ }
+#endif
+ void repne_scan(Register addr, Register value, Register count, Register tmp);
+
+ // Return true if an address is within the 48-bit RISCV64 address space.
+ bool is_valid_riscv64_address(address addr) {
+ return ((uintptr_t)addr >> 48) == 0;
+ }
+
+ void ld_constant(Register dest, const Address &const_addr) {
+ if (NearCpool) {
+ ld(dest, const_addr);
+ } else {
+ int32_t offset = 0;
+ la_patchable(dest, InternalAddress(const_addr.target()), offset);
+ ld(dest, Address(dest, offset));
+ }
+ }
+
+ int bitset_to_regs(unsigned int bitset, unsigned char* regs);
+ Address add_memory_helper(const Address dst);
+
+ void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire);
+ void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release);
+
+ // Check the current thread doesn't need a cross modify fence.
+ void verify_cross_modify_fence_not_required() PRODUCT_RETURN;
+};
+
+#ifdef ASSERT
+inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
+#endif
+
+/**
+ * class SkipIfEqual:
+ *
+ * Instantiating this class will result in assembly code being output that will
+ * jump around any code emitted between the creation of the instance and it's
+ * automatic destruction at the end of a scope block, depending on the value of
+ * the flag passed to the constructor, which will be checked at run-time.
+ */
+class SkipIfEqual {
+ private:
+ MacroAssembler* _masm;
+ Label _label;
+
+ public:
+ SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
+ ~SkipIfEqual();
+};
+
+#endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..ef968ccd96d98b42916980d78a78bbb73e8e8ddf
--- /dev/null
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
+#define CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
+
+// Still empty.
+
+#endif // CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..23a75d20502609871250ede91d637c8f206ef4e2
--- /dev/null
+++ b/src/hotspot/cpu/riscv/matcher_riscv.hpp
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_MATCHER_RISCV_HPP
+#define CPU_RISCV_MATCHER_RISCV_HPP
+
+ // Defined within class Matcher
+
+ // false => size gets scaled to BytesPerLong, ok.
+ static const bool init_array_count_is_in_bytes = false;
+
+ // Whether this platform implements the scalable vector feature
+ static const bool implements_scalable_vector = true;
+
+ static const bool supports_scalable_vector() {
+ return UseRVV;
+ }
+
+ // riscv supports misaligned vectors store/load.
+ static constexpr bool misaligned_vectors_ok() {
+ return true;
+ }
+
+ // Whether code generation need accurate ConvI2L types.
+ static const bool convi2l_type_required = false;
+
+ // Does the CPU require late expand (see block.cpp for description of late expand)?
+ static const bool require_postalloc_expand = false;
+
+ // Do we need to mask the count passed to shift instructions or does
+ // the cpu only look at the lower 5/6 bits anyway?
+ static const bool need_masked_shift_count = false;
+
+ // No support for generic vector operands.
+ static const bool supports_generic_vector_operands = false;
+
+ static constexpr bool isSimpleConstant64(jlong value) {
+ // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
+ // Probably always true, even if a temp register is required.
+ return true;
+ }
+
+ // Use conditional move (CMOVL)
+ static constexpr int long_cmove_cost() {
+ // long cmoves are no more expensive than int cmoves
+ return 0;
+ }
+
+ static constexpr int float_cmove_cost() {
+ // float cmoves are no more expensive than int cmoves
+ return 0;
+ }
+
+ // This affects two different things:
+ // - how Decode nodes are matched
+ // - how ImplicitNullCheck opportunities are recognized
+ // If true, the matcher will try to remove all Decodes and match them
+ // (as operands) into nodes. NullChecks are not prepared to deal with
+ // Decodes by final_graph_reshaping().
+ // If false, final_graph_reshaping() forces the decode behind the Cmp
+ // for a NullCheck. The matcher matches the Decode node into a register.
+ // Implicit_null_check optimization moves the Decode along with the
+ // memory operation back up before the NullCheck.
+ static bool narrow_oop_use_complex_address() {
+ return CompressedOops::shift() == 0;
+ }
+
+ static bool narrow_klass_use_complex_address() {
+ return false;
+ }
+
+ static bool const_oop_prefer_decode() {
+ // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
+ return CompressedOops::base() == NULL;
+ }
+
+ static bool const_klass_prefer_decode() {
+ // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
+ return CompressedKlassPointers::base() == NULL;
+ }
+
+ // Is it better to copy float constants, or load them directly from
+ // memory? Intel can load a float constant from a direct address,
+ // requiring no extra registers. Most RISCs will have to materialize
+ // an address into a register first, so they would do better to copy
+ // the constant from stack.
+ static const bool rematerialize_float_constants = false;
+
+ // If CPU can load and store mis-aligned doubles directly then no
+ // fixup is needed. Else we split the double into 2 integer pieces
+ // and move it piece-by-piece. Only happens when passing doubles into
+ // C code as the Java calling convention forces doubles to be aligned.
+ static const bool misaligned_doubles_ok = true;
+
+ // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
+ static const bool strict_fp_requires_explicit_rounding = false;
+
+ // Are floats converted to double when stored to stack during
+ // deoptimization?
+ static constexpr bool float_in_double() { return false; }
+
+ // Do ints take an entire long register or just half?
+ // The relevant question is how the int is callee-saved:
+ // the whole long is written but de-opt'ing will have to extract
+ // the relevant 32 bits.
+ static const bool int_in_long = true;
+
+ // Does the CPU supports vector variable shift instructions?
+ static constexpr bool supports_vector_variable_shifts(void) {
+ return false;
+ }
+
+ // Does the CPU supports vector variable rotate instructions?
+ static constexpr bool supports_vector_variable_rotates(void) {
+ return false;
+ }
+
+ // Does the CPU supports vector constant rotate instructions?
+ static constexpr bool supports_vector_constant_rotates(int shift) {
+ return false;
+ }
+
+ // Does the CPU supports vector unsigned comparison instructions?
+ static const bool supports_vector_comparison_unsigned(int vlen, BasicType bt) {
+ return false;
+ }
+
+ // Some microarchitectures have mask registers used on vectors
+ static const bool has_predicated_vectors(void) {
+ return false;
+ }
+
+ // true means we have fast l2f convers
+ // false means that conversion is done by runtime call
+ static constexpr bool convL2FSupported(void) {
+ return true;
+ }
+
+ // Implements a variant of EncodeISOArrayNode that encode ASCII only
+ static const bool supports_encode_ascii_array = false;
+
+ // Returns pre-selection estimated size of a vector operation.
+ static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
+ return 0;
+ }
+
+#endif // CPU_RISCV_MATCHER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1f7c0c87c216da57fcbf35ba4da78b031ca84f03
--- /dev/null
+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
@@ -0,0 +1,461 @@
+/*
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "classfile/javaClasses.inline.hpp"
+#include "classfile/vmClasses.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "memory/allocation.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/flags/flagSetting.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/stubRoutines.hpp"
+
+#define __ _masm->
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#else
+#define BLOCK_COMMENT(str) __ block_comment(str)
+#endif
+
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
+ assert_cond(_masm != NULL);
+ if (VerifyMethodHandles) {
+ verify_klass(_masm, klass_reg, VM_CLASS_ID(java_lang_Class),
+ "MH argument is a Class");
+ }
+ __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset()));
+}
+
+#ifdef ASSERT
+static int check_nonzero(const char* xname, int x) {
+ assert(x != 0, "%s should be nonzero", xname);
+ return x;
+}
+#define NONZERO(x) check_nonzero(#x, x)
+#else //ASSERT
+#define NONZERO(x) (x)
+#endif //PRODUCT
+
+#ifdef ASSERT
+void MethodHandles::verify_klass(MacroAssembler* _masm,
+ Register obj, vmClassID klass_id,
+ const char* error_message) {
+ assert_cond(_masm != NULL);
+ InstanceKlass** klass_addr = vmClasses::klass_addr_at(klass_id);
+ Klass* klass = vmClasses::klass_at(klass_id);
+ Register temp = t1;
+ Register temp2 = t0; // used by MacroAssembler::cmpptr
+ Label L_ok, L_bad;
+ BLOCK_COMMENT("verify_klass {");
+ __ verify_oop(obj);
+ __ beqz(obj, L_bad);
+ __ push_reg(RegSet::of(temp, temp2), sp);
+ __ load_klass(temp, obj);
+ __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok);
+ intptr_t super_check_offset = klass->super_check_offset();
+ __ ld(temp, Address(temp, super_check_offset));
+ __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok);
+ __ pop_reg(RegSet::of(temp, temp2), sp);
+ __ bind(L_bad);
+ __ stop(error_message);
+ __ BIND(L_ok);
+ __ pop_reg(RegSet::of(temp, temp2), sp);
+ BLOCK_COMMENT("} verify_klass");
+}
+
+void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {}
+
+#endif //ASSERT
+
+void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
+ bool for_compiler_entry) {
+ assert_cond(_masm != NULL);
+ assert(method == xmethod, "interpreter calling convention");
+ Label L_no_such_method;
+ __ beqz(xmethod, L_no_such_method);
+ __ verify_method_ptr(method);
+
+ if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
+ Label run_compiled_code;
+ // JVMTI events, such as single-stepping, are implemented partly by avoiding running
+ // compiled code in threads for which the event is enabled. Check here for
+ // interp_only_mode if these events CAN be enabled.
+
+ __ lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset()));
+ __ beqz(t0, run_compiled_code);
+ __ ld(t0, Address(method, Method::interpreter_entry_offset()));
+ __ jr(t0);
+ __ BIND(run_compiled_code);
+ }
+
+ const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() :
+ Method::from_interpreted_offset();
+ __ ld(t0,Address(method, entry_offset));
+ __ jr(t0);
+ __ bind(L_no_such_method);
+ __ far_jump(RuntimeAddress(StubRoutines::throw_AbstractMethodError_entry()));
+}
+
+void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
+ Register recv, Register method_temp,
+ Register temp2,
+ bool for_compiler_entry) {
+ assert_cond(_masm != NULL);
+ BLOCK_COMMENT("jump_to_lambda_form {");
+ // This is the initial entry point of a lazy method handle.
+ // After type checking, it picks up the invoker from the LambdaForm.
+ assert_different_registers(recv, method_temp, temp2);
+ assert(recv != noreg, "required register");
+ assert(method_temp == xmethod, "required register for loading method");
+
+ // Load the invoker, as MH -> MH.form -> LF.vmentry
+ __ verify_oop(recv);
+ __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset())), temp2);
+ __ verify_oop(method_temp);
+ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset())), temp2);
+ __ verify_oop(method_temp);
+ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset())), temp2);
+ __ verify_oop(method_temp);
+ __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())), noreg, noreg);
+
+ if (VerifyMethodHandles && !for_compiler_entry) {
+ // make sure recv is already on stack
+ __ ld(temp2, Address(method_temp, Method::const_offset()));
+ __ load_sized_value(temp2,
+ Address(temp2, ConstMethod::size_of_parameters_offset()),
+ sizeof(u2), /*is_signed*/ false);
+ Label L;
+ __ ld(t0, __ argument_address(temp2, -1));
+ __ beq(recv, t0, L);
+ __ ld(x10, __ argument_address(temp2, -1));
+ __ ebreak();
+ __ BIND(L);
+ }
+
+ jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry);
+ BLOCK_COMMENT("} jump_to_lambda_form");
+}
+
+// Code generation
+address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
+ vmIntrinsics::ID iid) {
+ assert_cond(_masm != NULL);
+ const bool not_for_compiler_entry = false; // this is the interpreter entry
+ assert(is_signature_polymorphic(iid), "expected invoke iid");
+ if (iid == vmIntrinsics::_invokeGeneric ||
+ iid == vmIntrinsics::_compiledLambdaForm) {
+ // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
+ // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
+ // They all allow an appendix argument.
+ __ ebreak(); // empty stubs make SG sick
+ return NULL;
+ }
+
+ // No need in interpreter entry for linkToNative for now.
+ // Interpreter calls compiled entry through i2c.
+ if (iid == vmIntrinsics::_linkToNative) {
+ __ ebreak();
+ return NULL;
+ }
+
+ // x30: sender SP (must preserve; see prepare_to_jump_from_interpreted)
+ // xmethod: Method*
+ // x13: argument locator (parameter slot count, added to sp)
+ // x11: used as temp to hold mh or receiver
+ // x10, x29: garbage temps, blown away
+ Register argp = x13; // argument list ptr, live on error paths
+ Register mh = x11; // MH receiver; dies quickly and is recycled
+
+ // here's where control starts out:
+ __ align(CodeEntryAlignment);
+ address entry_point = __ pc();
+
+ if (VerifyMethodHandles) {
+ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
+
+ Label L;
+ BLOCK_COMMENT("verify_intrinsic_id {");
+ __ lhu(t0, Address(xmethod, Method::intrinsic_id_offset_in_bytes()));
+ __ mv(t1, (int) iid);
+ __ beq(t0, t1, L);
+ if (iid == vmIntrinsics::_linkToVirtual ||
+ iid == vmIntrinsics::_linkToSpecial) {
+ // could do this for all kinds, but would explode assembly code size
+ trace_method_handle(_masm, "bad Method*::intrinsic_id");
+ }
+ __ ebreak();
+ __ bind(L);
+ BLOCK_COMMENT("} verify_intrinsic_id");
+ }
+
+ // First task: Find out how big the argument list is.
+ Address x13_first_arg_addr;
+ int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
+ assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
+ if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
+ __ ld(argp, Address(xmethod, Method::const_offset()));
+ __ load_sized_value(argp,
+ Address(argp, ConstMethod::size_of_parameters_offset()),
+ sizeof(u2), /*is_signed*/ false);
+ x13_first_arg_addr = __ argument_address(argp, -1);
+ } else {
+ DEBUG_ONLY(argp = noreg);
+ }
+
+ if (!is_signature_polymorphic_static(iid)) {
+ __ ld(mh, x13_first_arg_addr);
+ DEBUG_ONLY(argp = noreg);
+ }
+
+ // x13_first_arg_addr is live!
+
+ trace_method_handle_interpreter_entry(_masm, iid);
+ if (iid == vmIntrinsics::_invokeBasic) {
+ generate_method_handle_dispatch(_masm, iid, mh, noreg, not_for_compiler_entry);
+ } else {
+ // Adjust argument list by popping the trailing MemberName argument.
+ Register recv = noreg;
+ if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
+ // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
+ __ ld(recv = x12, x13_first_arg_addr);
+ }
+ DEBUG_ONLY(argp = noreg);
+ Register xmember = xmethod; // MemberName ptr; incoming method ptr is dead now
+ __ pop_reg(xmember); // extract last argument
+ generate_method_handle_dispatch(_masm, iid, recv, xmember, not_for_compiler_entry);
+ }
+
+ return entry_point;
+}
+
+
+void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
+ vmIntrinsics::ID iid,
+ Register receiver_reg,
+ Register member_reg,
+ bool for_compiler_entry) {
+ assert_cond(_masm != NULL);
+ assert(is_signature_polymorphic(iid), "expected invoke iid");
+ // temps used in this code are not used in *either* compiled or interpreted calling sequences
+ Register temp1 = x7;
+ Register temp2 = x28;
+ Register temp3 = x29; // x30 is live by this point: it contains the sender SP
+ if (for_compiler_entry) {
+ assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment");
+ assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
+ assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
+ assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
+ }
+
+ assert_different_registers(temp1, temp2, temp3, receiver_reg);
+ assert_different_registers(temp1, temp2, temp3, member_reg);
+
+ if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) {
+ if (iid == vmIntrinsics::_linkToNative) {
+ assert(for_compiler_entry, "only compiler entry is supported");
+ }
+ // indirect through MH.form.vmentry.vmtarget
+ jump_to_lambda_form(_masm, receiver_reg, xmethod, temp1, for_compiler_entry);
+ } else {
+ // The method is a member invoker used by direct method handles.
+ if (VerifyMethodHandles) {
+ // make sure the trailing argument really is a MemberName (caller responsibility)
+ verify_klass(_masm, member_reg, VM_CLASS_ID(java_lang_invoke_MemberName),
+ "MemberName required for invokeVirtual etc.");
+ }
+
+ Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset()));
+ Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset()));
+ Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset()));
+ Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset()));
+
+ Register temp1_recv_klass = temp1;
+ if (iid != vmIntrinsics::_linkToStatic) {
+ __ verify_oop(receiver_reg);
+ if (iid == vmIntrinsics::_linkToSpecial) {
+ // Don't actually load the klass; just null-check the receiver.
+ __ null_check(receiver_reg);
+ } else {
+ // load receiver klass itself
+ __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
+ __ load_klass(temp1_recv_klass, receiver_reg);
+ __ verify_klass_ptr(temp1_recv_klass);
+ }
+ BLOCK_COMMENT("check_receiver {");
+ // The receiver for the MemberName must be in receiver_reg.
+ // Check the receiver against the MemberName.clazz
+ if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
+ // Did not load it above...
+ __ load_klass(temp1_recv_klass, receiver_reg);
+ __ verify_klass_ptr(temp1_recv_klass);
+ }
+ if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
+ Label L_ok;
+ Register temp2_defc = temp2;
+ __ load_heap_oop(temp2_defc, member_clazz, temp3);
+ load_klass_from_Class(_masm, temp2_defc);
+ __ verify_klass_ptr(temp2_defc);
+ __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok);
+ // If we get here, the type check failed!
+ __ ebreak();
+ __ bind(L_ok);
+ }
+ BLOCK_COMMENT("} check_receiver");
+ }
+ if (iid == vmIntrinsics::_linkToSpecial ||
+ iid == vmIntrinsics::_linkToStatic) {
+ DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass
+ }
+
+ // Live registers at this point:
+ // member_reg - MemberName that was the trailing argument
+ // temp1_recv_klass - klass of stacked receiver, if needed
+ // x30 - interpreter linkage (if interpreted)
+ // x11 ... x10 - compiler arguments (if compiled)
+
+ Label L_incompatible_class_change_error;
+ switch (iid) {
+ case vmIntrinsics::_linkToSpecial:
+ if (VerifyMethodHandles) {
+ verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
+ }
+ __ load_heap_oop(xmethod, member_vmtarget);
+ __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg);
+ break;
+
+ case vmIntrinsics::_linkToStatic:
+ if (VerifyMethodHandles) {
+ verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
+ }
+ __ load_heap_oop(xmethod, member_vmtarget);
+ __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg);
+ break;
+
+ case vmIntrinsics::_linkToVirtual:
+ {
+ // same as TemplateTable::invokevirtual,
+ // minus the CP setup and profiling:
+
+ if (VerifyMethodHandles) {
+ verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
+ }
+
+ // pick out the vtable index from the MemberName, and then we can discard it:
+ Register temp2_index = temp2;
+ __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg);
+
+ if (VerifyMethodHandles) {
+ Label L_index_ok;
+ __ bgez(temp2_index, L_index_ok);
+ __ ebreak();
+ __ BIND(L_index_ok);
+ }
+
+ // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget
+ // at this point. And VerifyMethodHandles has already checked clazz, if needed.
+
+ // get target Method* & entry point
+ __ lookup_virtual_method(temp1_recv_klass, temp2_index, xmethod);
+ break;
+ }
+
+ case vmIntrinsics::_linkToInterface:
+ {
+ // same as TemplateTable::invokeinterface
+ // (minus the CP setup and profiling, with different argument motion)
+ if (VerifyMethodHandles) {
+ verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
+ }
+
+ Register temp3_intf = temp3;
+ __ load_heap_oop(temp3_intf, member_clazz);
+ load_klass_from_Class(_masm, temp3_intf);
+ __ verify_klass_ptr(temp3_intf);
+
+ Register rindex = xmethod;
+ __ access_load_at(T_ADDRESS, IN_HEAP, rindex, member_vmindex, noreg, noreg);
+ if (VerifyMethodHandles) {
+ Label L;
+ __ bgez(rindex, L);
+ __ ebreak();
+ __ bind(L);
+ }
+
+ // given intf, index, and recv klass, dispatch to the implementation method
+ __ lookup_interface_method(temp1_recv_klass, temp3_intf,
+ // note: next two args must be the same:
+ rindex, xmethod,
+ temp2,
+ L_incompatible_class_change_error);
+ break;
+ }
+
+ default:
+ fatal("unexpected intrinsic %d: %s", vmIntrinsics::as_int(iid), vmIntrinsics::name_at(iid));
+ break;
+ }
+
+ // live at this point: xmethod, x30 (if interpreted)
+
+ // After figuring out which concrete method to call, jump into it.
+ // Note that this works in the interpreter with no data motion.
+ // But the compiled version will require that r2_recv be shifted out.
+ __ verify_method_ptr(xmethod);
+ jump_from_method_handle(_masm, xmethod, temp1, for_compiler_entry);
+ if (iid == vmIntrinsics::_linkToInterface) {
+ __ bind(L_incompatible_class_change_error);
+ __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
+ }
+ }
+
+}
+
+#ifndef PRODUCT
+void trace_method_handle_stub(const char* adaptername,
+ oopDesc* mh,
+ intptr_t* saved_regs,
+ intptr_t* entry_sp) { }
+
+// The stub wraps the arguments in a struct on the stack to avoid
+// dealing with the different calling conventions for passing 6
+// arguments.
+struct MethodHandleStubArguments {
+ const char* adaptername;
+ oopDesc* mh;
+ intptr_t* saved_regs;
+ intptr_t* entry_sp;
+};
+void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) { }
+
+void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { }
+#endif //PRODUCT
diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..f73aba29d6750db8693eecd8100e5f2d1e85fc08
--- /dev/null
+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// Platform-specific definitions for method handles.
+// These definitions are inlined into class MethodHandles.
+
+// Adapters
+enum /* platform_dependent_constants */ {
+ adapter_code_size = 32000 DEBUG_ONLY(+ 120000)
+};
+
+public:
+
+ static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
+
+ static void verify_klass(MacroAssembler* _masm,
+ Register obj, vmClassID klass_id,
+ const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
+
+ static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) {
+ verify_klass(_masm, mh_reg, VM_CLASS_ID(java_lang_invoke_MethodHandle),
+ "reference is a MH");
+ }
+
+ static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
+
+ // Similar to InterpreterMacroAssembler::jump_from_interpreted.
+ // Takes care of special dispatch from single stepping too.
+ static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
+ bool for_compiler_entry);
+
+ static void jump_to_lambda_form(MacroAssembler* _masm,
+ Register recv, Register method_temp,
+ Register temp2,
+ bool for_compiler_entry);
diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0a05c5778607fb8e4cc6cba503db584197fe7537
--- /dev/null
+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
@@ -0,0 +1,429 @@
+/*
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "code/compiledIC.hpp"
+#include "memory/resourceArea.hpp"
+#include "nativeInst_riscv.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/handles.hpp"
+#include "runtime/orderAccess.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "utilities/ostream.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#endif
+
+Register NativeInstruction::extract_rs1(address instr) {
+ assert_cond(instr != NULL);
+ return as_Register(Assembler::extract(((unsigned*)instr)[0], 19, 15));
+}
+
+Register NativeInstruction::extract_rs2(address instr) {
+ assert_cond(instr != NULL);
+ return as_Register(Assembler::extract(((unsigned*)instr)[0], 24, 20));
+}
+
+Register NativeInstruction::extract_rd(address instr) {
+ assert_cond(instr != NULL);
+ return as_Register(Assembler::extract(((unsigned*)instr)[0], 11, 7));
+}
+
+uint32_t NativeInstruction::extract_opcode(address instr) {
+ assert_cond(instr != NULL);
+ return Assembler::extract(((unsigned*)instr)[0], 6, 0);
+}
+
+uint32_t NativeInstruction::extract_funct3(address instr) {
+ assert_cond(instr != NULL);
+ return Assembler::extract(((unsigned*)instr)[0], 14, 12);
+}
+
+bool NativeInstruction::is_pc_relative_at(address instr) {
+ // auipc + jalr
+ // auipc + addi
+ // auipc + load
+ // auipc + fload_load
+ return (is_auipc_at(instr)) &&
+ (is_addi_at(instr + instruction_size) ||
+ is_jalr_at(instr + instruction_size) ||
+ is_load_at(instr + instruction_size) ||
+ is_float_load_at(instr + instruction_size)) &&
+ check_pc_relative_data_dependency(instr);
+}
+
+// ie:ld(Rd, Label)
+bool NativeInstruction::is_load_pc_relative_at(address instr) {
+ return is_auipc_at(instr) && // auipc
+ is_ld_at(instr + instruction_size) && // ld
+ check_load_pc_relative_data_dependency(instr);
+}
+
+bool NativeInstruction::is_movptr_at(address instr) {
+ return is_lui_at(instr) && // Lui
+ is_addi_at(instr + instruction_size) && // Addi
+ is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11
+ is_addi_at(instr + instruction_size * 3) && // Addi
+ is_slli_shift_at(instr + instruction_size * 4, 5) && // Slli Rd, Rs, 5
+ (is_addi_at(instr + instruction_size * 5) ||
+ is_jalr_at(instr + instruction_size * 5) ||
+ is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load
+ check_movptr_data_dependency(instr);
+}
+
+bool NativeInstruction::is_li32_at(address instr) {
+ return is_lui_at(instr) && // lui
+ is_addiw_at(instr + instruction_size) && // addiw
+ check_li32_data_dependency(instr);
+}
+
+bool NativeInstruction::is_li64_at(address instr) {
+ return is_lui_at(instr) && // lui
+ is_addi_at(instr + instruction_size) && // addi
+ is_slli_shift_at(instr + instruction_size * 2, 12) && // Slli Rd, Rs, 12
+ is_addi_at(instr + instruction_size * 3) && // addi
+ is_slli_shift_at(instr + instruction_size * 4, 12) && // Slli Rd, Rs, 12
+ is_addi_at(instr + instruction_size * 5) && // addi
+ is_slli_shift_at(instr + instruction_size * 6, 8) && // Slli Rd, Rs, 8
+ is_addi_at(instr + instruction_size * 7) && // addi
+ check_li64_data_dependency(instr);
+}
+
+void NativeCall::verify() {
+ assert(NativeCall::is_call_at((address)this), "unexpected code at call site");
+}
+
+address NativeCall::destination() const {
+ address addr = (address)this;
+ assert(NativeInstruction::is_jal_at(instruction_address()), "inst must be jal.");
+ address destination = MacroAssembler::target_addr_for_insn(instruction_address());
+
+ // Do we use a trampoline stub for this call?
+ CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // Else we get assertion if nmethod is zombie.
+ assert(cb && cb->is_nmethod(), "sanity");
+ nmethod *nm = (nmethod *)cb;
+ if (nm != NULL && nm->stub_contains(destination) && is_NativeCallTrampolineStub_at(destination)) {
+ // Yes we do, so get the destination from the trampoline stub.
+ const address trampoline_stub_addr = destination;
+ destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination();
+ }
+
+ return destination;
+}
+
+// Similar to replace_mt_safe, but just changes the destination. The
+// important thing is that free-running threads are able to execute this
+// call instruction at all times.
+//
+// Used in the runtime linkage of calls; see class CompiledIC.
+//
+// Add parameter assert_lock to switch off assertion
+// during code generation, where no patching lock is needed.
+void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
+ assert(!assert_lock ||
+ (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()) ||
+ CompiledICLocker::is_safe(addr_at(0)),
+ "concurrent code patching");
+
+ ResourceMark rm;
+ address addr_call = addr_at(0);
+ assert(NativeCall::is_call_at(addr_call), "unexpected code at call site");
+
+ // Patch the constant in the call's trampoline stub.
+ address trampoline_stub_addr = get_trampoline();
+ if (trampoline_stub_addr != NULL) {
+ assert (!is_NativeCallTrampolineStub_at(dest), "chained trampolines");
+ nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest);
+ }
+
+ // Patch the call.
+ if (Assembler::reachable_from_branch_at(addr_call, dest)) {
+ set_destination(dest);
+ } else {
+ assert (trampoline_stub_addr != NULL, "we need a trampoline");
+ set_destination(trampoline_stub_addr);
+ }
+
+ ICache::invalidate_range(addr_call, instruction_size);
+}
+
+address NativeCall::get_trampoline() {
+ address call_addr = addr_at(0);
+
+ CodeBlob *code = CodeCache::find_blob(call_addr);
+ assert(code != NULL, "Could not find the containing code blob");
+
+ address jal_destination = MacroAssembler::pd_call_destination(call_addr);
+ if (code != NULL && code->contains(jal_destination) && is_NativeCallTrampolineStub_at(jal_destination)) {
+ return jal_destination;
+ }
+
+ if (code != NULL && code->is_nmethod()) {
+ return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code);
+ }
+
+ return NULL;
+}
+
+// Inserts a native call instruction at a given pc
+void NativeCall::insert(address code_pos, address entry) { Unimplemented(); }
+
+//-------------------------------------------------------------------
+
+void NativeMovConstReg::verify() {
+ if (!(nativeInstruction_at(instruction_address())->is_movptr() ||
+ is_auipc_at(instruction_address()))) {
+ fatal("should be MOVPTR or AUIPC");
+ }
+}
+
+intptr_t NativeMovConstReg::data() const {
+ address addr = MacroAssembler::target_addr_for_insn(instruction_address());
+ if (maybe_cpool_ref(instruction_address())) {
+ return *(intptr_t*)addr;
+ } else {
+ return (intptr_t)addr;
+ }
+}
+
+void NativeMovConstReg::set_data(intptr_t x) {
+ if (maybe_cpool_ref(instruction_address())) {
+ address addr = MacroAssembler::target_addr_for_insn(instruction_address());
+ *(intptr_t*)addr = x;
+ } else {
+ // Store x into the instruction stream.
+ MacroAssembler::pd_patch_instruction_size(instruction_address(), (address)x);
+ ICache::invalidate_range(instruction_address(), movptr_instruction_size);
+ }
+
+ // Find and replace the oop/metadata corresponding to this
+ // instruction in oops section.
+ CodeBlob* cb = CodeCache::find_blob(instruction_address());
+ nmethod* nm = cb->as_nmethod_or_null();
+ if (nm != NULL) {
+ RelocIterator iter(nm, instruction_address(), next_instruction_address());
+ while (iter.next()) {
+ if (iter.type() == relocInfo::oop_type) {
+ oop* oop_addr = iter.oop_reloc()->oop_addr();
+ *oop_addr = cast_to_oop(x);
+ break;
+ } else if (iter.type() == relocInfo::metadata_type) {
+ Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr();
+ *metadata_addr = (Metadata*)x;
+ break;
+ }
+ }
+ }
+}
+
+void NativeMovConstReg::print() {
+ tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT,
+ p2i(instruction_address()), data());
+}
+
+//-------------------------------------------------------------------
+
+int NativeMovRegMem::offset() const {
+ Unimplemented();
+ return 0;
+}
+
+void NativeMovRegMem::set_offset(int x) { Unimplemented(); }
+
+void NativeMovRegMem::verify() {
+ Unimplemented();
+}
+
+//--------------------------------------------------------------------------------
+
+void NativeJump::verify() { }
+
+
+void NativeJump::check_verified_entry_alignment(address entry, address verified_entry) {
+}
+
+
+address NativeJump::jump_destination() const {
+ address dest = MacroAssembler::target_addr_for_insn(instruction_address());
+
+ // We use jump to self as the unresolved address which the inline
+ // cache code (and relocs) know about
+ // As a special case we also use sequence movptr_with_offset(r,0), jalr(r,0)
+ // i.e. jump to 0 when we need leave space for a wide immediate
+ // load
+
+ // return -1 if jump to self or to 0
+ if ((dest == (address) this) || dest == 0) {
+ dest = (address) -1;
+ }
+
+ return dest;
+};
+
+void NativeJump::set_jump_destination(address dest) {
+ // We use jump to self as the unresolved address which the inline
+ // cache code (and relocs) know about
+ if (dest == (address) -1)
+ dest = instruction_address();
+
+ MacroAssembler::pd_patch_instruction(instruction_address(), dest);
+ ICache::invalidate_range(instruction_address(), instruction_size);
+}
+
+//-------------------------------------------------------------------
+
+address NativeGeneralJump::jump_destination() const {
+ NativeMovConstReg* move = nativeMovConstReg_at(instruction_address());
+ address dest = (address) move->data();
+
+ // We use jump to self as the unresolved address which the inline
+ // cache code (and relocs) know about
+ // As a special case we also use jump to 0 when first generating
+ // a general jump
+
+ // return -1 if jump to self or to 0
+ if ((dest == (address) this) || dest == 0) {
+ dest = (address) -1;
+ }
+
+ return dest;
+}
+
+//-------------------------------------------------------------------
+
+bool NativeInstruction::is_safepoint_poll() {
+ return is_lwu_to_zr(address(this));
+}
+
+bool NativeInstruction::is_lwu_to_zr(address instr) {
+ assert_cond(instr != NULL);
+ return (extract_opcode(instr) == 0b0000011 &&
+ extract_funct3(instr) == 0b110 &&
+ extract_rd(instr) == zr); // zr
+}
+
+// A 16-bit instruction with all bits ones is permanently reserved as an illegal instruction.
+bool NativeInstruction::is_sigill_zombie_not_entrant() {
+ // jvmci
+ return uint_at(0) == 0xffffffff;
+}
+
+void NativeIllegalInstruction::insert(address code_pos) {
+ assert_cond(code_pos != NULL);
+ *(juint*)code_pos = 0xffffffff; // all bits ones is permanently reserved as an illegal instruction
+}
+
+bool NativeInstruction::is_stop() {
+ return uint_at(0) == 0xffffffff; // an illegal instruction
+}
+
+//-------------------------------------------------------------------
+
+// MT-safe inserting of a jump over a jump or a nop (used by
+// nmethod::make_not_entrant_or_zombie)
+
+void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
+
+ assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
+
+ assert(nativeInstruction_at(verified_entry)->is_jump_or_nop() ||
+ nativeInstruction_at(verified_entry)->is_sigill_zombie_not_entrant(),
+ "riscv cannot replace non-jump with jump");
+
+ // Patch this nmethod atomically.
+ if (Assembler::reachable_from_branch_at(verified_entry, dest)) {
+ ptrdiff_t offset = dest - verified_entry;
+ guarantee(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction."); // 1M
+
+ uint32_t insn = 0;
+ address pInsn = (address)&insn;
+ Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1);
+ Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff);
+ Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1);
+ Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff);
+ Assembler::patch(pInsn, 11, 7, 0); // zero, no link jump
+ Assembler::patch(pInsn, 6, 0, 0b1101111); // j, (jal x0 offset)
+ *(unsigned int*)verified_entry = insn;
+ } else {
+ // We use an illegal instruction for marking a method as
+ // not_entrant or zombie.
+ NativeIllegalInstruction::insert(verified_entry);
+ }
+
+ ICache::invalidate_range(verified_entry, instruction_size);
+}
+
+void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
+ CodeBuffer cb(code_pos, instruction_size);
+ MacroAssembler a(&cb);
+
+ int32_t offset = 0;
+ a.movptr_with_offset(t0, entry, offset); // lui, addi, slli, addi, slli
+ a.jalr(x0, t0, offset); // jalr
+
+ ICache::invalidate_range(code_pos, instruction_size);
+}
+
+// MT-safe patching of a long jump instruction.
+void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
+ ShouldNotCallThis();
+}
+
+
+address NativeCallTrampolineStub::destination(nmethod *nm) const {
+ return ptr_at(data_offset);
+}
+
+void NativeCallTrampolineStub::set_destination(address new_destination) {
+ set_ptr_at(data_offset, new_destination);
+ OrderAccess::release();
+}
+
+uint32_t NativeMembar::get_kind() {
+ uint32_t insn = uint_at(0);
+
+ uint32_t predecessor = Assembler::extract(insn, 27, 24);
+ uint32_t successor = Assembler::extract(insn, 23, 20);
+
+ return MacroAssembler::pred_succ_to_membar_mask(predecessor, successor);
+}
+
+void NativeMembar::set_kind(uint32_t order_kind) {
+ uint32_t predecessor = 0;
+ uint32_t successor = 0;
+
+ MacroAssembler::membar_mask_to_pred_succ(order_kind, predecessor, successor);
+
+ uint32_t insn = uint_at(0);
+ address pInsn = (address) &insn;
+ Assembler::patch(pInsn, 27, 24, predecessor);
+ Assembler::patch(pInsn, 23, 20, successor);
+
+ address membar = addr_at(0);
+ *(unsigned int*) membar = insn;
+}
diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..718b2e3de6c5acc37ca3a9717062afbe392edd4a
--- /dev/null
+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
@@ -0,0 +1,572 @@
+/*
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_NATIVEINST_RISCV_HPP
+#define CPU_RISCV_NATIVEINST_RISCV_HPP
+
+#include "asm/assembler.hpp"
+#include "runtime/icache.hpp"
+#include "runtime/os.hpp"
+
+// We have interfaces for the following instructions:
+// - NativeInstruction
+// - - NativeCall
+// - - NativeMovConstReg
+// - - NativeMovRegMem
+// - - NativeJump
+// - - NativeGeneralJump
+// - - NativeIllegalInstruction
+// - - NativeCallTrampolineStub
+// - - NativeMembar
+// - - NativeFenceI
+
+// The base class for different kinds of native instruction abstractions.
+// Provides the primitive operations to manipulate code relative to this.
+
+class NativeCall;
+
+class NativeInstruction {
+ friend class Relocation;
+ friend bool is_NativeCallTrampolineStub_at(address);
+ public:
+ enum {
+ instruction_size = 4,
+ compressed_instruction_size = 2,
+ };
+
+ juint encoding() const {
+ return uint_at(0);
+ }
+
+ bool is_jal() const { return is_jal_at(addr_at(0)); }
+ bool is_movptr() const { return is_movptr_at(addr_at(0)); }
+ bool is_call() const { return is_call_at(addr_at(0)); }
+ bool is_jump() const { return is_jump_at(addr_at(0)); }
+
+ static bool is_jal_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1101111; }
+ static bool is_jalr_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100111 && extract_funct3(instr) == 0b000; }
+ static bool is_branch_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100011; }
+ static bool is_ld_at(address instr) { assert_cond(instr != NULL); return is_load_at(instr) && extract_funct3(instr) == 0b011; }
+ static bool is_load_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000011; }
+ static bool is_float_load_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000111; }
+ static bool is_auipc_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010111; }
+ static bool is_jump_at(address instr) { assert_cond(instr != NULL); return is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr); }
+ static bool is_addi_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010011 && extract_funct3(instr) == 0b000; }
+ static bool is_addiw_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0011011 && extract_funct3(instr) == 0b000; }
+ static bool is_lui_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0110111; }
+ static bool is_slli_shift_at(address instr, uint32_t shift) {
+ assert_cond(instr != NULL);
+ return (extract_opcode(instr) == 0b0010011 && // opcode field
+ extract_funct3(instr) == 0b001 && // funct3 field, select the type of operation
+ Assembler::extract(((unsigned*)instr)[0], 25, 20) == shift); // shamt field
+ }
+
+ static Register extract_rs1(address instr);
+ static Register extract_rs2(address instr);
+ static Register extract_rd(address instr);
+ static uint32_t extract_opcode(address instr);
+ static uint32_t extract_funct3(address instr);
+
+ // the instruction sequence of movptr is as below:
+ // lui
+ // addi
+ // slli
+ // addi
+ // slli
+ // addi/jalr/load
+ static bool check_movptr_data_dependency(address instr) {
+ address lui = instr;
+ address addi1 = lui + instruction_size;
+ address slli1 = addi1 + instruction_size;
+ address addi2 = slli1 + instruction_size;
+ address slli2 = addi2 + instruction_size;
+ address last_instr = slli2 + instruction_size;
+ return extract_rs1(addi1) == extract_rd(lui) &&
+ extract_rs1(addi1) == extract_rd(addi1) &&
+ extract_rs1(slli1) == extract_rd(addi1) &&
+ extract_rs1(slli1) == extract_rd(slli1) &&
+ extract_rs1(addi2) == extract_rd(slli1) &&
+ extract_rs1(addi2) == extract_rd(addi2) &&
+ extract_rs1(slli2) == extract_rd(addi2) &&
+ extract_rs1(slli2) == extract_rd(slli2) &&
+ extract_rs1(last_instr) == extract_rd(slli2);
+ }
+
+ // the instruction sequence of li64 is as below:
+ // lui
+ // addi
+ // slli
+ // addi
+ // slli
+ // addi
+ // slli
+ // addi
+ static bool check_li64_data_dependency(address instr) {
+ address lui = instr;
+ address addi1 = lui + instruction_size;
+ address slli1 = addi1 + instruction_size;
+ address addi2 = slli1 + instruction_size;
+ address slli2 = addi2 + instruction_size;
+ address addi3 = slli2 + instruction_size;
+ address slli3 = addi3 + instruction_size;
+ address addi4 = slli3 + instruction_size;
+ return extract_rs1(addi1) == extract_rd(lui) &&
+ extract_rs1(addi1) == extract_rd(addi1) &&
+ extract_rs1(slli1) == extract_rd(addi1) &&
+ extract_rs1(slli1) == extract_rd(slli1) &&
+ extract_rs1(addi2) == extract_rd(slli1) &&
+ extract_rs1(addi2) == extract_rd(addi2) &&
+ extract_rs1(slli2) == extract_rd(addi2) &&
+ extract_rs1(slli2) == extract_rd(slli2) &&
+ extract_rs1(addi3) == extract_rd(slli2) &&
+ extract_rs1(addi3) == extract_rd(addi3) &&
+ extract_rs1(slli3) == extract_rd(addi3) &&
+ extract_rs1(slli3) == extract_rd(slli3) &&
+ extract_rs1(addi4) == extract_rd(slli3) &&
+ extract_rs1(addi4) == extract_rd(addi4);
+ }
+
+ // the instruction sequence of li32 is as below:
+ // lui
+ // addiw
+ static bool check_li32_data_dependency(address instr) {
+ address lui = instr;
+ address addiw = lui + instruction_size;
+
+ return extract_rs1(addiw) == extract_rd(lui) &&
+ extract_rs1(addiw) == extract_rd(addiw);
+ }
+
+ // the instruction sequence of pc-relative is as below:
+ // auipc
+ // jalr/addi/load/float_load
+ static bool check_pc_relative_data_dependency(address instr) {
+ address auipc = instr;
+ address last_instr = auipc + instruction_size;
+
+ return extract_rs1(last_instr) == extract_rd(auipc);
+ }
+
+ // the instruction sequence of load_label is as below:
+ // auipc
+ // load
+ static bool check_load_pc_relative_data_dependency(address instr) {
+ address auipc = instr;
+ address load = auipc + instruction_size;
+
+ return extract_rd(load) == extract_rd(auipc) &&
+ extract_rs1(load) == extract_rd(load);
+ }
+
+ static bool is_movptr_at(address instr);
+ static bool is_li32_at(address instr);
+ static bool is_li64_at(address instr);
+ static bool is_pc_relative_at(address branch);
+ static bool is_load_pc_relative_at(address branch);
+
+ static bool is_call_at(address instr) {
+ if (is_jal_at(instr) || is_jalr_at(instr)) {
+ return true;
+ }
+ return false;
+ }
+ static bool is_lwu_to_zr(address instr);
+
+ inline bool is_nop();
+ inline bool is_jump_or_nop();
+ bool is_safepoint_poll();
+ bool is_sigill_zombie_not_entrant();
+ bool is_stop();
+
+ protected:
+ address addr_at(int offset) const { return address(this) + offset; }
+
+ jint int_at(int offset) const { return *(jint*) addr_at(offset); }
+ juint uint_at(int offset) const { return *(juint*) addr_at(offset); }
+
+ address ptr_at(int offset) const { return *(address*) addr_at(offset); }
+
+ oop oop_at (int offset) const { return *(oop*) addr_at(offset); }
+
+
+ void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; }
+ void set_uint_at(int offset, jint i) { *(juint*)addr_at(offset) = i; }
+ void set_ptr_at (int offset, address ptr) { *(address*) addr_at(offset) = ptr; }
+ void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; }
+
+ public:
+
+ inline friend NativeInstruction* nativeInstruction_at(address addr);
+
+ static bool maybe_cpool_ref(address instr) {
+ return is_auipc_at(instr);
+ }
+
+ bool is_membar() {
+ return (uint_at(0) & 0x7f) == 0b1111 && extract_funct3(addr_at(0)) == 0;
+ }
+};
+
+inline NativeInstruction* nativeInstruction_at(address addr) {
+ return (NativeInstruction*)addr;
+}
+
+// The natural type of an RISCV instruction is uint32_t
+inline NativeInstruction* nativeInstruction_at(uint32_t *addr) {
+ return (NativeInstruction*)addr;
+}
+
+inline NativeCall* nativeCall_at(address addr);
+// The NativeCall is an abstraction for accessing/manipulating native
+// call instructions (used to manipulate inline caches, primitive &
+// DSO calls, etc.).
+
+class NativeCall: public NativeInstruction {
+ public:
+ enum RISCV_specific_constants {
+ instruction_size = 4,
+ instruction_offset = 0,
+ displacement_offset = 0,
+ return_address_offset = 4
+ };
+
+ address instruction_address() const { return addr_at(instruction_offset); }
+ address next_instruction_address() const { return addr_at(return_address_offset); }
+ address return_address() const { return addr_at(return_address_offset); }
+ address destination() const;
+
+ void set_destination(address dest) {
+ assert(is_jal(), "Should be jal instruction!");
+ intptr_t offset = (intptr_t)(dest - instruction_address());
+ assert((offset & 0x1) == 0, "bad alignment");
+ assert(is_imm_in_range(offset, 20, 1), "encoding constraint");
+ unsigned int insn = 0b1101111; // jal
+ address pInsn = (address)(&insn);
+ Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1);
+ Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff);
+ Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1);
+ Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff);
+ Assembler::patch(pInsn, 11, 7, ra->encoding()); // Rd must be x1, need ra
+ set_int_at(displacement_offset, insn);
+ }
+
+ void verify_alignment() {} // do nothing on riscv
+ void verify();
+ void print();
+
+ // Creation
+ inline friend NativeCall* nativeCall_at(address addr);
+ inline friend NativeCall* nativeCall_before(address return_address);
+
+ static bool is_call_before(address return_address) {
+ return is_call_at(return_address - NativeCall::return_address_offset);
+ }
+
+ // MT-safe patching of a call instruction.
+ static void insert(address code_pos, address entry);
+
+ static void replace_mt_safe(address instr_addr, address code_buffer);
+
+ // Similar to replace_mt_safe, but just changes the destination. The
+ // important thing is that free-running threads are able to execute
+ // this call instruction at all times. If the call is an immediate BL
+ // instruction we can simply rely on atomicity of 32-bit writes to
+ // make sure other threads will see no intermediate states.
+
+ // We cannot rely on locks here, since the free-running threads must run at
+ // full speed.
+ //
+ // Used in the runtime linkage of calls; see class CompiledIC.
+ // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.)
+
+ // The parameter assert_lock disables the assertion during code generation.
+ void set_destination_mt_safe(address dest, bool assert_lock = true);
+
+ address get_trampoline();
+};
+
+inline NativeCall* nativeCall_at(address addr) {
+ assert_cond(addr != NULL);
+ NativeCall* call = (NativeCall*)(addr - NativeCall::instruction_offset);
+#ifdef ASSERT
+ call->verify();
+#endif
+ return call;
+}
+
+inline NativeCall* nativeCall_before(address return_address) {
+ assert_cond(return_address != NULL);
+ NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset);
+#ifdef ASSERT
+ call->verify();
+#endif
+ return call;
+}
+
+// An interface for accessing/manipulating native mov reg, imm instructions.
+// (used to manipulate inlined 64-bit data calls, etc.)
+class NativeMovConstReg: public NativeInstruction {
+ public:
+ enum RISCV_specific_constants {
+ movptr_instruction_size = 6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, addi. See movptr().
+ movptr_with_offset_instruction_size = 5 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli. See movptr_with_offset().
+ load_pc_relative_instruction_size = 2 * NativeInstruction::instruction_size, // auipc, ld
+ instruction_offset = 0,
+ displacement_offset = 0
+ };
+
+ address instruction_address() const { return addr_at(instruction_offset); }
+ address next_instruction_address() const {
+ // if the instruction at 5 * instruction_size is addi,
+ // it means a lui + addi + slli + addi + slli + addi instruction sequence,
+ // and the next instruction address should be addr_at(6 * instruction_size).
+ // However, when the instruction at 5 * instruction_size isn't addi,
+ // the next instruction address should be addr_at(5 * instruction_size)
+ if (nativeInstruction_at(instruction_address())->is_movptr()) {
+ if (is_addi_at(addr_at(movptr_with_offset_instruction_size))) {
+ // Assume: lui, addi, slli, addi, slli, addi
+ return addr_at(movptr_instruction_size);
+ } else {
+ // Assume: lui, addi, slli, addi, slli
+ return addr_at(movptr_with_offset_instruction_size);
+ }
+ } else if (is_load_pc_relative_at(instruction_address())) {
+ // Assume: auipc, ld
+ return addr_at(load_pc_relative_instruction_size);
+ }
+ guarantee(false, "Unknown instruction in NativeMovConstReg");
+ return NULL;
+ }
+
+ intptr_t data() const;
+ void set_data(intptr_t x);
+
+ void flush() {
+ if (!maybe_cpool_ref(instruction_address())) {
+ ICache::invalidate_range(instruction_address(), movptr_instruction_size);
+ }
+ }
+
+ void verify();
+ void print();
+
+ // Creation
+ inline friend NativeMovConstReg* nativeMovConstReg_at(address addr);
+ inline friend NativeMovConstReg* nativeMovConstReg_before(address addr);
+};
+
+inline NativeMovConstReg* nativeMovConstReg_at(address addr) {
+ assert_cond(addr != NULL);
+ NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_offset);
+#ifdef ASSERT
+ test->verify();
+#endif
+ return test;
+}
+
+inline NativeMovConstReg* nativeMovConstReg_before(address addr) {
+ assert_cond(addr != NULL);
+ NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset);
+#ifdef ASSERT
+ test->verify();
+#endif
+ return test;
+}
+
+// RISCV should not use C1 runtime patching, so just leave NativeMovRegMem Unimplemented.
+class NativeMovRegMem: public NativeInstruction {
+ public:
+ int instruction_start() const {
+ Unimplemented();
+ return 0;
+ }
+
+ address instruction_address() const {
+ Unimplemented();
+ return NULL;
+ }
+
+ int num_bytes_to_end_of_patch() const {
+ Unimplemented();
+ return 0;
+ }
+
+ int offset() const;
+
+ void set_offset(int x);
+
+ void add_offset_in_bytes(int add_offset) { Unimplemented(); }
+
+ void verify();
+ void print();
+
+ private:
+ inline friend NativeMovRegMem* nativeMovRegMem_at (address addr);
+};
+
+inline NativeMovRegMem* nativeMovRegMem_at (address addr) {
+ Unimplemented();
+ return NULL;
+}
+
+class NativeJump: public NativeInstruction {
+ public:
+ enum RISCV_specific_constants {
+ instruction_size = NativeInstruction::instruction_size,
+ instruction_offset = 0,
+ data_offset = 0,
+ next_instruction_offset = NativeInstruction::instruction_size
+ };
+
+ address instruction_address() const { return addr_at(instruction_offset); }
+ address next_instruction_address() const { return addr_at(instruction_size); }
+ address jump_destination() const;
+ void set_jump_destination(address dest);
+
+ // Creation
+ inline friend NativeJump* nativeJump_at(address address);
+
+ void verify();
+
+ // Insertion of native jump instruction
+ static void insert(address code_pos, address entry);
+ // MT-safe insertion of native jump at verified method entry
+ static void check_verified_entry_alignment(address entry, address verified_entry);
+ static void patch_verified_entry(address entry, address verified_entry, address dest);
+};
+
+inline NativeJump* nativeJump_at(address addr) {
+ NativeJump* jump = (NativeJump*)(addr - NativeJump::instruction_offset);
+#ifdef ASSERT
+ jump->verify();
+#endif
+ return jump;
+}
+
+class NativeGeneralJump: public NativeJump {
+public:
+ enum RISCV_specific_constants {
+ instruction_size = 6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, jalr
+ instruction_offset = 0,
+ data_offset = 0,
+ next_instruction_offset = 6 * NativeInstruction::instruction_size // lui, addi, slli, addi, slli, jalr
+ };
+
+ address jump_destination() const;
+
+ static void insert_unconditional(address code_pos, address entry);
+ static void replace_mt_safe(address instr_addr, address code_buffer);
+};
+
+inline NativeGeneralJump* nativeGeneralJump_at(address addr) {
+ assert_cond(addr != NULL);
+ NativeGeneralJump* jump = (NativeGeneralJump*)(addr);
+ debug_only(jump->verify();)
+ return jump;
+}
+
+class NativeIllegalInstruction: public NativeInstruction {
+ public:
+ // Insert illegal opcode as specific address
+ static void insert(address code_pos);
+};
+
+inline bool NativeInstruction::is_nop() {
+ uint32_t insn = *(uint32_t*)addr_at(0);
+ return insn == 0x13;
+}
+
+inline bool NativeInstruction::is_jump_or_nop() {
+ return is_nop() || is_jump();
+}
+
+// Call trampoline stubs.
+class NativeCallTrampolineStub : public NativeInstruction {
+ public:
+
+ enum RISCV_specific_constants {
+ // Refer to function emit_trampoline_stub.
+ instruction_size = 3 * NativeInstruction::instruction_size + wordSize, // auipc + ld + jr + target address
+ data_offset = 3 * NativeInstruction::instruction_size, // auipc + ld + jr
+ };
+
+ address destination(nmethod *nm = NULL) const;
+ void set_destination(address new_destination);
+ ptrdiff_t destination_offset() const;
+};
+
+inline bool is_NativeCallTrampolineStub_at(address addr) {
+ // Ensure that the stub is exactly
+ // ld t0, L--->auipc + ld
+ // jr t0
+ // L:
+
+ // judge inst + register + imm
+ // 1). check the instructions: auipc + ld + jalr
+ // 2). check if auipc[11:7] == t0 and ld[11:7] == t0 and ld[19:15] == t0 && jr[19:15] == t0
+ // 3). check if the offset in ld[31:20] equals the data_offset
+ assert_cond(addr != NULL);
+ const int instr_size = NativeInstruction::instruction_size;
+ if (NativeInstruction::is_auipc_at(addr) &&
+ NativeInstruction::is_ld_at(addr + instr_size) &&
+ NativeInstruction::is_jalr_at(addr + 2 * instr_size) &&
+ (NativeInstruction::extract_rd(addr) == x5) &&
+ (NativeInstruction::extract_rd(addr + instr_size) == x5) &&
+ (NativeInstruction::extract_rs1(addr + instr_size) == x5) &&
+ (NativeInstruction::extract_rs1(addr + 2 * instr_size) == x5) &&
+ (Assembler::extract(((unsigned*)addr)[1], 31, 20) == NativeCallTrampolineStub::data_offset)) {
+ return true;
+ }
+ return false;
+}
+
+inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) {
+ assert_cond(addr != NULL);
+ assert(is_NativeCallTrampolineStub_at(addr), "no call trampoline found");
+ return (NativeCallTrampolineStub*)addr;
+}
+
+class NativeMembar : public NativeInstruction {
+public:
+ uint32_t get_kind();
+ void set_kind(uint32_t order_kind);
+};
+
+inline NativeMembar *NativeMembar_at(address addr) {
+ assert_cond(addr != NULL);
+ assert(nativeInstruction_at(addr)->is_membar(), "no membar found");
+ return (NativeMembar*)addr;
+}
+
+class NativeFenceI : public NativeInstruction {
+public:
+ static inline int instruction_size() {
+ // 2 for fence.i + fence
+ return (UseConservativeFence ? 2 : 1) * NativeInstruction::instruction_size;
+ }
+};
+
+#endif // CPU_RISCV_NATIVEINST_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.cpp b/src/hotspot/cpu/riscv/registerMap_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..26c1edc36ffc4c0ef65d1eb380b3b151b8026d2a
--- /dev/null
+++ b/src/hotspot/cpu/riscv/registerMap_riscv.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/registerMap.hpp"
+#include "vmreg_riscv.inline.hpp"
+
+address RegisterMap::pd_location(VMReg base_reg, int slot_idx) const {
+ if (base_reg->is_VectorRegister()) {
+ assert(base_reg->is_concrete(), "must pass base reg");
+ int base_reg_enc = (base_reg->value() - ConcreteRegisterImpl::max_fpr) /
+ VectorRegisterImpl::max_slots_per_register;
+ intptr_t offset_in_bytes = slot_idx * VMRegImpl::stack_slot_size;
+ address base_location = location(base_reg);
+ if (base_location != NULL) {
+ return base_location + offset_in_bytes;
+ } else {
+ return NULL;
+ }
+ } else {
+ return location(base_reg->next(slot_idx));
+ }
+}
diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.hpp b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..f34349811a9ca2cb2e27bd7afb52c83157cb2cb0
--- /dev/null
+++ b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_REGISTERMAP_RISCV_HPP
+#define CPU_RISCV_REGISTERMAP_RISCV_HPP
+
+// machine-dependent implemention for register maps
+ friend class frame;
+
+ private:
+ // This is the hook for finding a register in an "well-known" location,
+ // such as a register block of a predetermined format.
+ address pd_location(VMReg reg) const { return NULL; }
+ address pd_location(VMReg base_reg, int slot_idx) const;
+
+ // no PD state to clear or copy:
+ void pd_clear() {}
+ void pd_initialize() {}
+ void pd_initialize_from(const RegisterMap* map) {}
+
+#endif // CPU_RISCV_REGISTERMAP_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f8116e9df8c7fa82c9213cf9c8bf591a110afddc
--- /dev/null
+++ b/src/hotspot/cpu/riscv/register_riscv.cpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "register_riscv.hpp"
+
+REGISTER_IMPL_DEFINITION(Register, RegisterImpl, RegisterImpl::number_of_registers);
+REGISTER_IMPL_DEFINITION(FloatRegister, FloatRegisterImpl, FloatRegisterImpl::number_of_registers);
+REGISTER_IMPL_DEFINITION(VectorRegister, VectorRegisterImpl, VectorRegisterImpl::number_of_registers);
+
+const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers *
+ RegisterImpl::max_slots_per_register;
+
+const int ConcreteRegisterImpl::max_fpr =
+ ConcreteRegisterImpl::max_gpr +
+ FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register;
+
+const int ConcreteRegisterImpl::max_vpr =
+ ConcreteRegisterImpl::max_fpr +
+ VectorRegisterImpl::number_of_registers * VectorRegisterImpl::max_slots_per_register;
+
+
+const char* RegisterImpl::name() const {
+ static const char *const names[number_of_registers] = {
+ "zr", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "fp", "x9",
+ "c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7",
+ "x18", "x19", "esp", "xdispatch", "xbcp", "xthread", "xlocals",
+ "xmonitors", "xcpool", "xheapbase", "x28", "x29", "x30", "xmethod"
+ };
+ return is_valid() ? names[encoding()] : "noreg";
+}
+
+const char* FloatRegisterImpl::name() const {
+ static const char *const names[number_of_registers] = {
+ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
+ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
+ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
+ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"
+ };
+ return is_valid() ? names[encoding()] : "noreg";
+}
+
+const char* VectorRegisterImpl::name() const {
+ static const char *const names[number_of_registers] = {
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
+ "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"
+ };
+ return is_valid() ? names[encoding()] : "noreg";
+}
diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..a9200cac647b40c3d7d8eb9d5fdb77bae738f352
--- /dev/null
+++ b/src/hotspot/cpu/riscv/register_riscv.hpp
@@ -0,0 +1,324 @@
+/*
+ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_REGISTER_RISCV_HPP
+#define CPU_RISCV_REGISTER_RISCV_HPP
+
+#include "asm/register.hpp"
+
+#define CSR_FFLAGS 0x001 // Floating-Point Accrued Exceptions.
+#define CSR_FRM 0x002 // Floating-Point Dynamic Rounding Mode.
+#define CSR_FCSR 0x003 // Floating-Point Control and Status Register (frm + fflags).
+#define CSR_VSTART 0x008 // Vector start position
+#define CSR_VXSAT 0x009 // Fixed-Point Saturate Flag
+#define CSR_VXRM 0x00A // Fixed-Point Rounding Mode
+#define CSR_VCSR 0x00F // Vector control and status register
+#define CSR_VL 0xC20 // Vector length
+#define CSR_VTYPE 0xC21 // Vector data type register
+#define CSR_VLENB 0xC22 // VLEN/8 (vector register length in bytes)
+#define CSR_CYCLE 0xc00 // Cycle counter for RDCYCLE instruction.
+#define CSR_TIME 0xc01 // Timer for RDTIME instruction.
+#define CSR_INSTERT 0xc02 // Instructions-retired counter for RDINSTRET instruction.
+
+class VMRegImpl;
+typedef VMRegImpl* VMReg;
+
+// Use Register as shortcut
+class RegisterImpl;
+typedef const RegisterImpl* Register;
+
+inline constexpr Register as_Register(int encoding);
+
+class RegisterImpl: public AbstractRegisterImpl {
+ static constexpr Register first();
+
+ public:
+ enum {
+ number_of_registers = 32,
+ max_slots_per_register = 2,
+
+ // integer registers x8 - x15 and floating-point registers f8 - f15 are allocatable
+ // for compressed instructions. See Table 17.2 in spec.
+ compressed_register_base = 8,
+ compressed_register_top = 15,
+ };
+
+ // derived registers, offsets, and addresses
+ const Register successor() const { return this + 1; }
+
+ // construction
+ inline friend constexpr Register as_Register(int encoding);
+
+ VMReg as_VMReg() const;
+
+ // accessors
+ int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
+ int encoding_nocheck() const { return this - first(); }
+ bool is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; }
+ const char* name() const;
+
+ // for rvc
+ int compressed_encoding() const {
+ assert(is_compressed_valid(), "invalid compressed register");
+ return encoding() - compressed_register_base;
+ }
+
+ int compressed_encoding_nocheck() const {
+ return encoding_nocheck() - compressed_register_base;
+ }
+
+ bool is_compressed_valid() const {
+ return encoding_nocheck() >= compressed_register_base &&
+ encoding_nocheck() <= compressed_register_top;
+ }
+};
+
+REGISTER_IMPL_DECLARATION(Register, RegisterImpl, RegisterImpl::number_of_registers);
+
+// The integer registers of the RISCV architecture
+
+CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
+
+CONSTANT_REGISTER_DECLARATION(Register, x0, (0));
+CONSTANT_REGISTER_DECLARATION(Register, x1, (1));
+CONSTANT_REGISTER_DECLARATION(Register, x2, (2));
+CONSTANT_REGISTER_DECLARATION(Register, x3, (3));
+CONSTANT_REGISTER_DECLARATION(Register, x4, (4));
+CONSTANT_REGISTER_DECLARATION(Register, x5, (5));
+CONSTANT_REGISTER_DECLARATION(Register, x6, (6));
+CONSTANT_REGISTER_DECLARATION(Register, x7, (7));
+CONSTANT_REGISTER_DECLARATION(Register, x8, (8));
+CONSTANT_REGISTER_DECLARATION(Register, x9, (9));
+CONSTANT_REGISTER_DECLARATION(Register, x10, (10));
+CONSTANT_REGISTER_DECLARATION(Register, x11, (11));
+CONSTANT_REGISTER_DECLARATION(Register, x12, (12));
+CONSTANT_REGISTER_DECLARATION(Register, x13, (13));
+CONSTANT_REGISTER_DECLARATION(Register, x14, (14));
+CONSTANT_REGISTER_DECLARATION(Register, x15, (15));
+CONSTANT_REGISTER_DECLARATION(Register, x16, (16));
+CONSTANT_REGISTER_DECLARATION(Register, x17, (17));
+CONSTANT_REGISTER_DECLARATION(Register, x18, (18));
+CONSTANT_REGISTER_DECLARATION(Register, x19, (19));
+CONSTANT_REGISTER_DECLARATION(Register, x20, (20));
+CONSTANT_REGISTER_DECLARATION(Register, x21, (21));
+CONSTANT_REGISTER_DECLARATION(Register, x22, (22));
+CONSTANT_REGISTER_DECLARATION(Register, x23, (23));
+CONSTANT_REGISTER_DECLARATION(Register, x24, (24));
+CONSTANT_REGISTER_DECLARATION(Register, x25, (25));
+CONSTANT_REGISTER_DECLARATION(Register, x26, (26));
+CONSTANT_REGISTER_DECLARATION(Register, x27, (27));
+CONSTANT_REGISTER_DECLARATION(Register, x28, (28));
+CONSTANT_REGISTER_DECLARATION(Register, x29, (29));
+CONSTANT_REGISTER_DECLARATION(Register, x30, (30));
+CONSTANT_REGISTER_DECLARATION(Register, x31, (31));
+
+// Use FloatRegister as shortcut
+class FloatRegisterImpl;
+typedef const FloatRegisterImpl* FloatRegister;
+
+inline constexpr FloatRegister as_FloatRegister(int encoding);
+
+// The implementation of floating point registers for the architecture
+class FloatRegisterImpl: public AbstractRegisterImpl {
+ static constexpr FloatRegister first();
+
+ public:
+ enum {
+ number_of_registers = 32,
+ max_slots_per_register = 2,
+
+ // float registers in the range of [f8~f15] correspond to RVC. Please see Table 16.2 in spec.
+ compressed_register_base = 8,
+ compressed_register_top = 15,
+ };
+
+ // construction
+ inline friend constexpr FloatRegister as_FloatRegister(int encoding);
+
+ VMReg as_VMReg() const;
+
+ // derived registers, offsets, and addresses
+ FloatRegister successor() const {
+ return as_FloatRegister((encoding() + 1) % (unsigned)number_of_registers);
+ }
+
+ // accessors
+ int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
+ int encoding_nocheck() const { return this - first(); }
+ int is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; }
+ const char* name() const;
+
+ // for rvc
+ int compressed_encoding() const {
+ assert(is_compressed_valid(), "invalid compressed register");
+ return encoding() - compressed_register_base;
+ }
+
+ int compressed_encoding_nocheck() const {
+ return encoding_nocheck() - compressed_register_base;
+ }
+
+ bool is_compressed_valid() const {
+ return encoding_nocheck() >= compressed_register_base &&
+ encoding_nocheck() <= compressed_register_top;
+ }
+};
+
+REGISTER_IMPL_DECLARATION(FloatRegister, FloatRegisterImpl, FloatRegisterImpl::number_of_registers);
+
+// The float registers of the RISCV architecture
+
+CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
+
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f0 , ( 0));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f1 , ( 1));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f2 , ( 2));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f3 , ( 3));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f4 , ( 4));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f5 , ( 5));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f6 , ( 6));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f7 , ( 7));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f8 , ( 8));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f9 , ( 9));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f10 , (10));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f11 , (11));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f12 , (12));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f13 , (13));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f14 , (14));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f15 , (15));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f16 , (16));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f17 , (17));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f18 , (18));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f19 , (19));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f20 , (20));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f21 , (21));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f22 , (22));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f23 , (23));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f24 , (24));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f25 , (25));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f26 , (26));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f27 , (27));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f28 , (28));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f29 , (29));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f30 , (30));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f31 , (31));
+
+// Use VectorRegister as shortcut
+class VectorRegisterImpl;
+typedef const VectorRegisterImpl* VectorRegister;
+
+inline constexpr VectorRegister as_VectorRegister(int encoding);
+
+// The implementation of vector registers for RVV
+class VectorRegisterImpl: public AbstractRegisterImpl {
+ static constexpr VectorRegister first();
+
+ public:
+ enum {
+ number_of_registers = 32,
+ max_slots_per_register = 4
+ };
+
+ // construction
+ inline friend constexpr VectorRegister as_VectorRegister(int encoding);
+
+ VMReg as_VMReg() const;
+
+ // derived registers, offsets, and addresses
+ VectorRegister successor() const { return this + 1; }
+
+ // accessors
+ int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
+ int encoding_nocheck() const { return this - first(); }
+ bool is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; }
+ const char* name() const;
+
+};
+
+REGISTER_IMPL_DECLARATION(VectorRegister, VectorRegisterImpl, VectorRegisterImpl::number_of_registers);
+
+// The vector registers of RVV
+CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg , (-1));
+
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v0 , ( 0));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v1 , ( 1));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v2 , ( 2));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v3 , ( 3));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v4 , ( 4));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v5 , ( 5));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v6 , ( 6));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v7 , ( 7));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v8 , ( 8));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v9 , ( 9));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v10 , (10));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v11 , (11));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v12 , (12));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v13 , (13));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v14 , (14));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v15 , (15));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v16 , (16));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v17 , (17));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v18 , (18));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v19 , (19));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v20 , (20));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v21 , (21));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v22 , (22));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v23 , (23));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v24 , (24));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v25 , (25));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v26 , (26));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v27 , (27));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v28 , (28));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v29 , (29));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v30 , (30));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v31 , (31));
+
+
+// Need to know the total number of registers of all sorts for SharedInfo.
+// Define a class that exports it.
+class ConcreteRegisterImpl : public AbstractRegisterImpl {
+ public:
+ enum {
+ // A big enough number for C2: all the registers plus flags
+ // This number must be large enough to cover REG_COUNT (defined by c2) registers.
+ // There is no requirement that any ordering here matches any ordering c2 gives
+ // it's optoregs.
+
+ number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers +
+ FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers +
+ VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers)
+ };
+
+ // added to make it compile
+ static const int max_gpr;
+ static const int max_fpr;
+ static const int max_vpr;
+};
+
+typedef AbstractRegSet RegSet;
+typedef AbstractRegSet FloatRegSet;
+typedef AbstractRegSet VectorRegSet;
+
+#endif // CPU_RISCV_REGISTER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..228a64eae2c6491ff3c03f01da14de82de22d89c
--- /dev/null
+++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "code/relocInfo.hpp"
+#include "nativeInst_riscv.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/safepoint.hpp"
+
+void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
+ if (verify_only) {
+ return;
+ }
+
+ int bytes;
+
+ switch (type()) {
+ case relocInfo::oop_type: {
+ oop_Relocation *reloc = (oop_Relocation *)this;
+ // in movoop when BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate
+ if (NativeInstruction::is_load_pc_relative_at(addr())) {
+ address constptr = (address)code()->oop_addr_at(reloc->oop_index());
+ bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr);
+ assert(*(address*)constptr == x, "error in oop relocation");
+ } else {
+ bytes = MacroAssembler::patch_oop(addr(), x);
+ }
+ break;
+ }
+ default:
+ bytes = MacroAssembler::pd_patch_instruction_size(addr(), x);
+ break;
+ }
+ ICache::invalidate_range(addr(), bytes);
+}
+
+address Relocation::pd_call_destination(address orig_addr) {
+ assert(is_call(), "should be an address instruction here");
+ if (NativeCall::is_call_at(addr())) {
+ address trampoline = nativeCall_at(addr())->get_trampoline();
+ if (trampoline != NULL) {
+ return nativeCallTrampolineStub_at(trampoline)->destination();
+ }
+ }
+ if (orig_addr != NULL) {
+ // the extracted address from the instructions in address orig_addr
+ address new_addr = MacroAssembler::pd_call_destination(orig_addr);
+ // If call is branch to self, don't try to relocate it, just leave it
+ // as branch to self. This happens during code generation if the code
+ // buffer expands. It will be relocated to the trampoline above once
+ // code generation is complete.
+ new_addr = (new_addr == orig_addr) ? addr() : new_addr;
+ return new_addr;
+ }
+ return MacroAssembler::pd_call_destination(addr());
+}
+
+void Relocation::pd_set_call_destination(address x) {
+ assert(is_call(), "should be an address instruction here");
+ if (NativeCall::is_call_at(addr())) {
+ address trampoline = nativeCall_at(addr())->get_trampoline();
+ if (trampoline != NULL) {
+ nativeCall_at(addr())->set_destination_mt_safe(x, /* assert_lock */false);
+ return;
+ }
+ }
+ MacroAssembler::pd_patch_instruction_size(addr(), x);
+ address pd_call = pd_call_destination(addr());
+ assert(pd_call == x, "fail in reloc");
+}
+
+address* Relocation::pd_address_in_code() {
+ assert(NativeCall::is_load_pc_relative_at(addr()), "Not the expected instruction sequence!");
+ return (address*)(MacroAssembler::target_addr_for_insn(addr()));
+}
+
+address Relocation::pd_get_address_from_code() {
+ return MacroAssembler::pd_call_destination(addr());
+}
+
+void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
+ if (NativeInstruction::maybe_cpool_ref(addr())) {
+ address old_addr = old_addr_for(addr(), src, dest);
+ MacroAssembler::pd_patch_instruction_size(addr(), MacroAssembler::target_addr_for_insn(old_addr));
+ }
+}
+
+void metadata_Relocation::pd_fix_value(address x) {
+}
diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.hpp b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..840ed935d88b74c2462b5f96f8bbbd6d9cdd9d6b
--- /dev/null
+++ b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_RELOCINFO_RISCV_HPP
+#define CPU_RISCV_RELOCINFO_RISCV_HPP
+
+ // machine-dependent parts of class relocInfo
+ private:
+ enum {
+ // Relocations are byte-aligned.
+ offset_unit = 1,
+ // Must be at least 1 for RelocInfo::narrow_oop_in_const.
+ format_width = 1
+ };
+
+ public:
+
+ // This platform has no oops in the code that are not also
+ // listed in the oop section.
+ static bool mustIterateImmediateOopsInCode() { return false; }
+
+#endif // CPU_RISCV_RELOCINFO_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
new file mode 100644
index 0000000000000000000000000000000000000000..d06991b0854fc202bba0391ca185db976a024080
--- /dev/null
+++ b/src/hotspot/cpu/riscv/riscv.ad
@@ -0,0 +1,10611 @@
+//
+// Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+// RISCV Architecture Description File
+
+//----------REGISTER DEFINITION BLOCK------------------------------------------
+// This information is used by the matcher and the register allocator to
+// describe individual registers and classes of registers within the target
+// archtecture.
+
+register %{
+//----------Architecture Description Register Definitions----------------------
+// General Registers
+// "reg_def" name ( register save type, C convention save type,
+// ideal register type, encoding );
+// Register Save Types:
+//
+// NS = No-Save: The register allocator assumes that these registers
+// can be used without saving upon entry to the method, &
+// that they do not need to be saved at call sites.
+//
+// SOC = Save-On-Call: The register allocator assumes that these registers
+// can be used without saving upon entry to the method,
+// but that they must be saved at call sites.
+//
+// SOE = Save-On-Entry: The register allocator assumes that these registers
+// must be saved before using them upon entry to the
+// method, but they do not need to be saved at call
+// sites.
+//
+// AS = Always-Save: The register allocator assumes that these registers
+// must be saved before using them upon entry to the
+// method, & that they must be saved at call sites.
+//
+// Ideal Register Type is used to determine how to save & restore a
+// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
+// spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
+//
+// The encoding number is the actual bit-pattern placed into the opcodes.
+
+// We must define the 64 bit int registers in two 32 bit halves, the
+// real lower register and a virtual upper half register. upper halves
+// are used by the register allocator but are not actually supplied as
+// operands to memory ops.
+//
+// follow the C1 compiler in making registers
+//
+// x7, x9-x17, x27-x31 volatile (caller save)
+// x0-x4, x8, x23 system (no save, no allocate)
+// x5-x6 non-allocatable (so we can use them as temporary regs)
+
+//
+// as regards Java usage. we don't use any callee save registers
+// because this makes it difficult to de-optimise a frame (see comment
+// in x86 implementation of Deoptimization::unwind_callee_save_values)
+//
+
+// General Registers
+
+reg_def R0 ( NS, NS, Op_RegI, 0, x0->as_VMReg() ); // zr
+reg_def R0_H ( NS, NS, Op_RegI, 0, x0->as_VMReg()->next() );
+reg_def R1 ( NS, SOC, Op_RegI, 1, x1->as_VMReg() ); // ra
+reg_def R1_H ( NS, SOC, Op_RegI, 1, x1->as_VMReg()->next() );
+reg_def R2 ( NS, SOE, Op_RegI, 2, x2->as_VMReg() ); // sp
+reg_def R2_H ( NS, SOE, Op_RegI, 2, x2->as_VMReg()->next() );
+reg_def R3 ( NS, NS, Op_RegI, 3, x3->as_VMReg() ); // gp
+reg_def R3_H ( NS, NS, Op_RegI, 3, x3->as_VMReg()->next() );
+reg_def R4 ( NS, NS, Op_RegI, 4, x4->as_VMReg() ); // tp
+reg_def R4_H ( NS, NS, Op_RegI, 4, x4->as_VMReg()->next() );
+reg_def R7 ( SOC, SOC, Op_RegI, 7, x7->as_VMReg() );
+reg_def R7_H ( SOC, SOC, Op_RegI, 7, x7->as_VMReg()->next() );
+reg_def R8 ( NS, SOE, Op_RegI, 8, x8->as_VMReg() ); // fp
+reg_def R8_H ( NS, SOE, Op_RegI, 8, x8->as_VMReg()->next() );
+reg_def R9 ( SOC, SOE, Op_RegI, 9, x9->as_VMReg() );
+reg_def R9_H ( SOC, SOE, Op_RegI, 9, x9->as_VMReg()->next() );
+reg_def R10 ( SOC, SOC, Op_RegI, 10, x10->as_VMReg() );
+reg_def R10_H ( SOC, SOC, Op_RegI, 10, x10->as_VMReg()->next());
+reg_def R11 ( SOC, SOC, Op_RegI, 11, x11->as_VMReg() );
+reg_def R11_H ( SOC, SOC, Op_RegI, 11, x11->as_VMReg()->next());
+reg_def R12 ( SOC, SOC, Op_RegI, 12, x12->as_VMReg() );
+reg_def R12_H ( SOC, SOC, Op_RegI, 12, x12->as_VMReg()->next());
+reg_def R13 ( SOC, SOC, Op_RegI, 13, x13->as_VMReg() );
+reg_def R13_H ( SOC, SOC, Op_RegI, 13, x13->as_VMReg()->next());
+reg_def R14 ( SOC, SOC, Op_RegI, 14, x14->as_VMReg() );
+reg_def R14_H ( SOC, SOC, Op_RegI, 14, x14->as_VMReg()->next());
+reg_def R15 ( SOC, SOC, Op_RegI, 15, x15->as_VMReg() );
+reg_def R15_H ( SOC, SOC, Op_RegI, 15, x15->as_VMReg()->next());
+reg_def R16 ( SOC, SOC, Op_RegI, 16, x16->as_VMReg() );
+reg_def R16_H ( SOC, SOC, Op_RegI, 16, x16->as_VMReg()->next());
+reg_def R17 ( SOC, SOC, Op_RegI, 17, x17->as_VMReg() );
+reg_def R17_H ( SOC, SOC, Op_RegI, 17, x17->as_VMReg()->next());
+reg_def R18 ( SOC, SOE, Op_RegI, 18, x18->as_VMReg() );
+reg_def R18_H ( SOC, SOE, Op_RegI, 18, x18->as_VMReg()->next());
+reg_def R19 ( SOC, SOE, Op_RegI, 19, x19->as_VMReg() );
+reg_def R19_H ( SOC, SOE, Op_RegI, 19, x19->as_VMReg()->next());
+reg_def R20 ( SOC, SOE, Op_RegI, 20, x20->as_VMReg() ); // caller esp
+reg_def R20_H ( SOC, SOE, Op_RegI, 20, x20->as_VMReg()->next());
+reg_def R21 ( SOC, SOE, Op_RegI, 21, x21->as_VMReg() );
+reg_def R21_H ( SOC, SOE, Op_RegI, 21, x21->as_VMReg()->next());
+reg_def R22 ( SOC, SOE, Op_RegI, 22, x22->as_VMReg() );
+reg_def R22_H ( SOC, SOE, Op_RegI, 22, x22->as_VMReg()->next());
+reg_def R23 ( NS, SOE, Op_RegI, 23, x23->as_VMReg() ); // java thread
+reg_def R23_H ( NS, SOE, Op_RegI, 23, x23->as_VMReg()->next());
+reg_def R24 ( SOC, SOE, Op_RegI, 24, x24->as_VMReg() );
+reg_def R24_H ( SOC, SOE, Op_RegI, 24, x24->as_VMReg()->next());
+reg_def R25 ( SOC, SOE, Op_RegI, 25, x25->as_VMReg() );
+reg_def R25_H ( SOC, SOE, Op_RegI, 25, x25->as_VMReg()->next());
+reg_def R26 ( SOC, SOE, Op_RegI, 26, x26->as_VMReg() );
+reg_def R26_H ( SOC, SOE, Op_RegI, 26, x26->as_VMReg()->next());
+reg_def R27 ( SOC, SOE, Op_RegI, 27, x27->as_VMReg() ); // heapbase
+reg_def R27_H ( SOC, SOE, Op_RegI, 27, x27->as_VMReg()->next());
+reg_def R28 ( SOC, SOC, Op_RegI, 28, x28->as_VMReg() );
+reg_def R28_H ( SOC, SOC, Op_RegI, 28, x28->as_VMReg()->next());
+reg_def R29 ( SOC, SOC, Op_RegI, 29, x29->as_VMReg() );
+reg_def R29_H ( SOC, SOC, Op_RegI, 29, x29->as_VMReg()->next());
+reg_def R30 ( SOC, SOC, Op_RegI, 30, x30->as_VMReg() );
+reg_def R30_H ( SOC, SOC, Op_RegI, 30, x30->as_VMReg()->next());
+reg_def R31 ( SOC, SOC, Op_RegI, 31, x31->as_VMReg() );
+reg_def R31_H ( SOC, SOC, Op_RegI, 31, x31->as_VMReg()->next());
+
+// ----------------------------
+// Float/Double Registers
+// ----------------------------
+
+// Double Registers
+
+// The rules of ADL require that double registers be defined in pairs.
+// Each pair must be two 32-bit values, but not necessarily a pair of
+// single float registers. In each pair, ADLC-assigned register numbers
+// must be adjacent, with the lower number even. Finally, when the
+// CPU stores such a register pair to memory, the word associated with
+// the lower ADLC-assigned number must be stored to the lower address.
+
+// RISCV has 32 floating-point registers. Each can store a single
+// or double precision floating-point value.
+
+// for Java use float registers f0-f31 are always save on call whereas
+// the platform ABI treats f8-f9 and f18-f27 as callee save). Other
+// float registers are SOC as per the platform spec
+
+reg_def F0 ( SOC, SOC, Op_RegF, 0, f0->as_VMReg() );
+reg_def F0_H ( SOC, SOC, Op_RegF, 0, f0->as_VMReg()->next() );
+reg_def F1 ( SOC, SOC, Op_RegF, 1, f1->as_VMReg() );
+reg_def F1_H ( SOC, SOC, Op_RegF, 1, f1->as_VMReg()->next() );
+reg_def F2 ( SOC, SOC, Op_RegF, 2, f2->as_VMReg() );
+reg_def F2_H ( SOC, SOC, Op_RegF, 2, f2->as_VMReg()->next() );
+reg_def F3 ( SOC, SOC, Op_RegF, 3, f3->as_VMReg() );
+reg_def F3_H ( SOC, SOC, Op_RegF, 3, f3->as_VMReg()->next() );
+reg_def F4 ( SOC, SOC, Op_RegF, 4, f4->as_VMReg() );
+reg_def F4_H ( SOC, SOC, Op_RegF, 4, f4->as_VMReg()->next() );
+reg_def F5 ( SOC, SOC, Op_RegF, 5, f5->as_VMReg() );
+reg_def F5_H ( SOC, SOC, Op_RegF, 5, f5->as_VMReg()->next() );
+reg_def F6 ( SOC, SOC, Op_RegF, 6, f6->as_VMReg() );
+reg_def F6_H ( SOC, SOC, Op_RegF, 6, f6->as_VMReg()->next() );
+reg_def F7 ( SOC, SOC, Op_RegF, 7, f7->as_VMReg() );
+reg_def F7_H ( SOC, SOC, Op_RegF, 7, f7->as_VMReg()->next() );
+reg_def F8 ( SOC, SOE, Op_RegF, 8, f8->as_VMReg() );
+reg_def F8_H ( SOC, SOE, Op_RegF, 8, f8->as_VMReg()->next() );
+reg_def F9 ( SOC, SOE, Op_RegF, 9, f9->as_VMReg() );
+reg_def F9_H ( SOC, SOE, Op_RegF, 9, f9->as_VMReg()->next() );
+reg_def F10 ( SOC, SOC, Op_RegF, 10, f10->as_VMReg() );
+reg_def F10_H ( SOC, SOC, Op_RegF, 10, f10->as_VMReg()->next() );
+reg_def F11 ( SOC, SOC, Op_RegF, 11, f11->as_VMReg() );
+reg_def F11_H ( SOC, SOC, Op_RegF, 11, f11->as_VMReg()->next() );
+reg_def F12 ( SOC, SOC, Op_RegF, 12, f12->as_VMReg() );
+reg_def F12_H ( SOC, SOC, Op_RegF, 12, f12->as_VMReg()->next() );
+reg_def F13 ( SOC, SOC, Op_RegF, 13, f13->as_VMReg() );
+reg_def F13_H ( SOC, SOC, Op_RegF, 13, f13->as_VMReg()->next() );
+reg_def F14 ( SOC, SOC, Op_RegF, 14, f14->as_VMReg() );
+reg_def F14_H ( SOC, SOC, Op_RegF, 14, f14->as_VMReg()->next() );
+reg_def F15 ( SOC, SOC, Op_RegF, 15, f15->as_VMReg() );
+reg_def F15_H ( SOC, SOC, Op_RegF, 15, f15->as_VMReg()->next() );
+reg_def F16 ( SOC, SOC, Op_RegF, 16, f16->as_VMReg() );
+reg_def F16_H ( SOC, SOC, Op_RegF, 16, f16->as_VMReg()->next() );
+reg_def F17 ( SOC, SOC, Op_RegF, 17, f17->as_VMReg() );
+reg_def F17_H ( SOC, SOC, Op_RegF, 17, f17->as_VMReg()->next() );
+reg_def F18 ( SOC, SOE, Op_RegF, 18, f18->as_VMReg() );
+reg_def F18_H ( SOC, SOE, Op_RegF, 18, f18->as_VMReg()->next() );
+reg_def F19 ( SOC, SOE, Op_RegF, 19, f19->as_VMReg() );
+reg_def F19_H ( SOC, SOE, Op_RegF, 19, f19->as_VMReg()->next() );
+reg_def F20 ( SOC, SOE, Op_RegF, 20, f20->as_VMReg() );
+reg_def F20_H ( SOC, SOE, Op_RegF, 20, f20->as_VMReg()->next() );
+reg_def F21 ( SOC, SOE, Op_RegF, 21, f21->as_VMReg() );
+reg_def F21_H ( SOC, SOE, Op_RegF, 21, f21->as_VMReg()->next() );
+reg_def F22 ( SOC, SOE, Op_RegF, 22, f22->as_VMReg() );
+reg_def F22_H ( SOC, SOE, Op_RegF, 22, f22->as_VMReg()->next() );
+reg_def F23 ( SOC, SOE, Op_RegF, 23, f23->as_VMReg() );
+reg_def F23_H ( SOC, SOE, Op_RegF, 23, f23->as_VMReg()->next() );
+reg_def F24 ( SOC, SOE, Op_RegF, 24, f24->as_VMReg() );
+reg_def F24_H ( SOC, SOE, Op_RegF, 24, f24->as_VMReg()->next() );
+reg_def F25 ( SOC, SOE, Op_RegF, 25, f25->as_VMReg() );
+reg_def F25_H ( SOC, SOE, Op_RegF, 25, f25->as_VMReg()->next() );
+reg_def F26 ( SOC, SOE, Op_RegF, 26, f26->as_VMReg() );
+reg_def F26_H ( SOC, SOE, Op_RegF, 26, f26->as_VMReg()->next() );
+reg_def F27 ( SOC, SOE, Op_RegF, 27, f27->as_VMReg() );
+reg_def F27_H ( SOC, SOE, Op_RegF, 27, f27->as_VMReg()->next() );
+reg_def F28 ( SOC, SOC, Op_RegF, 28, f28->as_VMReg() );
+reg_def F28_H ( SOC, SOC, Op_RegF, 28, f28->as_VMReg()->next() );
+reg_def F29 ( SOC, SOC, Op_RegF, 29, f29->as_VMReg() );
+reg_def F29_H ( SOC, SOC, Op_RegF, 29, f29->as_VMReg()->next() );
+reg_def F30 ( SOC, SOC, Op_RegF, 30, f30->as_VMReg() );
+reg_def F30_H ( SOC, SOC, Op_RegF, 30, f30->as_VMReg()->next() );
+reg_def F31 ( SOC, SOC, Op_RegF, 31, f31->as_VMReg() );
+reg_def F31_H ( SOC, SOC, Op_RegF, 31, f31->as_VMReg()->next() );
+
+// ----------------------------
+// Vector Registers
+// ----------------------------
+
+// For RVV vector registers, we simply extend vector register size to 4
+// 'logical' slots. This is nominally 128 bits but it actually covers
+// all possible 'physical' RVV vector register lengths from 128 ~ 1024
+// bits. The 'physical' RVV vector register length is detected during
+// startup, so the register allocator is able to identify the correct
+// number of bytes needed for an RVV spill/unspill.
+
+reg_def V0 ( SOC, SOC, Op_VecA, 0, v0->as_VMReg() );
+reg_def V0_H ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next() );
+reg_def V0_J ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next(2) );
+reg_def V0_K ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next(3) );
+
+reg_def V1 ( SOC, SOC, Op_VecA, 1, v1->as_VMReg() );
+reg_def V1_H ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next() );
+reg_def V1_J ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next(2) );
+reg_def V1_K ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next(3) );
+
+reg_def V2 ( SOC, SOC, Op_VecA, 2, v2->as_VMReg() );
+reg_def V2_H ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next() );
+reg_def V2_J ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next(2) );
+reg_def V2_K ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next(3) );
+
+reg_def V3 ( SOC, SOC, Op_VecA, 3, v3->as_VMReg() );
+reg_def V3_H ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next() );
+reg_def V3_J ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next(2) );
+reg_def V3_K ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next(3) );
+
+reg_def V4 ( SOC, SOC, Op_VecA, 4, v4->as_VMReg() );
+reg_def V4_H ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next() );
+reg_def V4_J ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next(2) );
+reg_def V4_K ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next(3) );
+
+reg_def V5 ( SOC, SOC, Op_VecA, 5, v5->as_VMReg() );
+reg_def V5_H ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next() );
+reg_def V5_J ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next(2) );
+reg_def V5_K ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next(3) );
+
+reg_def V6 ( SOC, SOC, Op_VecA, 6, v6->as_VMReg() );
+reg_def V6_H ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next() );
+reg_def V6_J ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next(2) );
+reg_def V6_K ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next(3) );
+
+reg_def V7 ( SOC, SOC, Op_VecA, 7, v7->as_VMReg() );
+reg_def V7_H ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next() );
+reg_def V7_J ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next(2) );
+reg_def V7_K ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next(3) );
+
+reg_def V8 ( SOC, SOC, Op_VecA, 8, v8->as_VMReg() );
+reg_def V8_H ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next() );
+reg_def V8_J ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next(2) );
+reg_def V8_K ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next(3) );
+
+reg_def V9 ( SOC, SOC, Op_VecA, 9, v9->as_VMReg() );
+reg_def V9_H ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next() );
+reg_def V9_J ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next(2) );
+reg_def V9_K ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next(3) );
+
+reg_def V10 ( SOC, SOC, Op_VecA, 10, v10->as_VMReg() );
+reg_def V10_H ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next() );
+reg_def V10_J ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(2) );
+reg_def V10_K ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(3) );
+
+reg_def V11 ( SOC, SOC, Op_VecA, 11, v11->as_VMReg() );
+reg_def V11_H ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next() );
+reg_def V11_J ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(2) );
+reg_def V11_K ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(3) );
+
+reg_def V12 ( SOC, SOC, Op_VecA, 12, v12->as_VMReg() );
+reg_def V12_H ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next() );
+reg_def V12_J ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(2) );
+reg_def V12_K ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(3) );
+
+reg_def V13 ( SOC, SOC, Op_VecA, 13, v13->as_VMReg() );
+reg_def V13_H ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next() );
+reg_def V13_J ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(2) );
+reg_def V13_K ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(3) );
+
+reg_def V14 ( SOC, SOC, Op_VecA, 14, v14->as_VMReg() );
+reg_def V14_H ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next() );
+reg_def V14_J ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(2) );
+reg_def V14_K ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(3) );
+
+reg_def V15 ( SOC, SOC, Op_VecA, 15, v15->as_VMReg() );
+reg_def V15_H ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next() );
+reg_def V15_J ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(2) );
+reg_def V15_K ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(3) );
+
+reg_def V16 ( SOC, SOC, Op_VecA, 16, v16->as_VMReg() );
+reg_def V16_H ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next() );
+reg_def V16_J ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(2) );
+reg_def V16_K ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(3) );
+
+reg_def V17 ( SOC, SOC, Op_VecA, 17, v17->as_VMReg() );
+reg_def V17_H ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next() );
+reg_def V17_J ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(2) );
+reg_def V17_K ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(3) );
+
+reg_def V18 ( SOC, SOC, Op_VecA, 18, v18->as_VMReg() );
+reg_def V18_H ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next() );
+reg_def V18_J ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(2) );
+reg_def V18_K ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(3) );
+
+reg_def V19 ( SOC, SOC, Op_VecA, 19, v19->as_VMReg() );
+reg_def V19_H ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next() );
+reg_def V19_J ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(2) );
+reg_def V19_K ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(3) );
+
+reg_def V20 ( SOC, SOC, Op_VecA, 20, v20->as_VMReg() );
+reg_def V20_H ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next() );
+reg_def V20_J ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(2) );
+reg_def V20_K ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(3) );
+
+reg_def V21 ( SOC, SOC, Op_VecA, 21, v21->as_VMReg() );
+reg_def V21_H ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next() );
+reg_def V21_J ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(2) );
+reg_def V21_K ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(3) );
+
+reg_def V22 ( SOC, SOC, Op_VecA, 22, v22->as_VMReg() );
+reg_def V22_H ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next() );
+reg_def V22_J ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(2) );
+reg_def V22_K ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(3) );
+
+reg_def V23 ( SOC, SOC, Op_VecA, 23, v23->as_VMReg() );
+reg_def V23_H ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next() );
+reg_def V23_J ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(2) );
+reg_def V23_K ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(3) );
+
+reg_def V24 ( SOC, SOC, Op_VecA, 24, v24->as_VMReg() );
+reg_def V24_H ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next() );
+reg_def V24_J ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(2) );
+reg_def V24_K ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(3) );
+
+reg_def V25 ( SOC, SOC, Op_VecA, 25, v25->as_VMReg() );
+reg_def V25_H ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next() );
+reg_def V25_J ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(2) );
+reg_def V25_K ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(3) );
+
+reg_def V26 ( SOC, SOC, Op_VecA, 26, v26->as_VMReg() );
+reg_def V26_H ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next() );
+reg_def V26_J ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(2) );
+reg_def V26_K ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(3) );
+
+reg_def V27 ( SOC, SOC, Op_VecA, 27, v27->as_VMReg() );
+reg_def V27_H ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next() );
+reg_def V27_J ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(2) );
+reg_def V27_K ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(3) );
+
+reg_def V28 ( SOC, SOC, Op_VecA, 28, v28->as_VMReg() );
+reg_def V28_H ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next() );
+reg_def V28_J ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(2) );
+reg_def V28_K ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(3) );
+
+reg_def V29 ( SOC, SOC, Op_VecA, 29, v29->as_VMReg() );
+reg_def V29_H ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next() );
+reg_def V29_J ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(2) );
+reg_def V29_K ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(3) );
+
+reg_def V30 ( SOC, SOC, Op_VecA, 30, v30->as_VMReg() );
+reg_def V30_H ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next() );
+reg_def V30_J ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(2) );
+reg_def V30_K ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(3) );
+
+reg_def V31 ( SOC, SOC, Op_VecA, 31, v31->as_VMReg() );
+reg_def V31_H ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next() );
+reg_def V31_J ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(2) );
+reg_def V31_K ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(3) );
+
+// ----------------------------
+// Special Registers
+// ----------------------------
+
+// On riscv, the physical flag register is missing, so we use t1 instead,
+// to bridge the RegFlag semantics in share/opto
+
+reg_def RFLAGS (SOC, SOC, Op_RegFlags, 6, x6->as_VMReg() );
+
+// Specify priority of register selection within phases of register
+// allocation. Highest priority is first. A useful heuristic is to
+// give registers a low priority when they are required by machine
+// instructions, like EAX and EDX on I486, and choose no-save registers
+// before save-on-call, & save-on-call before save-on-entry. Registers
+// which participate in fixed calling sequences should come last.
+// Registers which are used as pairs must fall on an even boundary.
+
+alloc_class chunk0(
+ // volatiles
+ R7, R7_H,
+ R28, R28_H,
+ R29, R29_H,
+ R30, R30_H,
+ R31, R31_H,
+
+ // arg registers
+ R10, R10_H,
+ R11, R11_H,
+ R12, R12_H,
+ R13, R13_H,
+ R14, R14_H,
+ R15, R15_H,
+ R16, R16_H,
+ R17, R17_H,
+
+ // non-volatiles
+ R9, R9_H,
+ R18, R18_H,
+ R19, R19_H,
+ R20, R20_H,
+ R21, R21_H,
+ R22, R22_H,
+ R24, R24_H,
+ R25, R25_H,
+ R26, R26_H,
+
+ // non-allocatable registers
+ R23, R23_H, // java thread
+ R27, R27_H, // heapbase
+ R4, R4_H, // thread
+ R8, R8_H, // fp
+ R0, R0_H, // zero
+ R1, R1_H, // ra
+ R2, R2_H, // sp
+ R3, R3_H, // gp
+);
+
+alloc_class chunk1(
+
+ // no save
+ F0, F0_H,
+ F1, F1_H,
+ F2, F2_H,
+ F3, F3_H,
+ F4, F4_H,
+ F5, F5_H,
+ F6, F6_H,
+ F7, F7_H,
+ F28, F28_H,
+ F29, F29_H,
+ F30, F30_H,
+ F31, F31_H,
+
+ // arg registers
+ F10, F10_H,
+ F11, F11_H,
+ F12, F12_H,
+ F13, F13_H,
+ F14, F14_H,
+ F15, F15_H,
+ F16, F16_H,
+ F17, F17_H,
+
+ // non-volatiles
+ F8, F8_H,
+ F9, F9_H,
+ F18, F18_H,
+ F19, F19_H,
+ F20, F20_H,
+ F21, F21_H,
+ F22, F22_H,
+ F23, F23_H,
+ F24, F24_H,
+ F25, F25_H,
+ F26, F26_H,
+ F27, F27_H,
+);
+
+alloc_class chunk2(
+ V0, V0_H, V0_J, V0_K,
+ V1, V1_H, V1_J, V1_K,
+ V2, V2_H, V2_J, V2_K,
+ V3, V3_H, V3_J, V3_K,
+ V4, V4_H, V4_J, V4_K,
+ V5, V5_H, V5_J, V5_K,
+ V6, V6_H, V6_J, V6_K,
+ V7, V7_H, V7_J, V7_K,
+ V8, V8_H, V8_J, V8_K,
+ V9, V9_H, V9_J, V9_K,
+ V10, V10_H, V10_J, V10_K,
+ V11, V11_H, V11_J, V11_K,
+ V12, V12_H, V12_J, V12_K,
+ V13, V13_H, V13_J, V13_K,
+ V14, V14_H, V14_J, V14_K,
+ V15, V15_H, V15_J, V15_K,
+ V16, V16_H, V16_J, V16_K,
+ V17, V17_H, V17_J, V17_K,
+ V18, V18_H, V18_J, V18_K,
+ V19, V19_H, V19_J, V19_K,
+ V20, V20_H, V20_J, V20_K,
+ V21, V21_H, V21_J, V21_K,
+ V22, V22_H, V22_J, V22_K,
+ V23, V23_H, V23_J, V23_K,
+ V24, V24_H, V24_J, V24_K,
+ V25, V25_H, V25_J, V25_K,
+ V26, V26_H, V26_J, V26_K,
+ V27, V27_H, V27_J, V27_K,
+ V28, V28_H, V28_J, V28_K,
+ V29, V29_H, V29_J, V29_K,
+ V30, V30_H, V30_J, V30_K,
+ V31, V31_H, V31_J, V31_K,
+);
+
+alloc_class chunk3(RFLAGS);
+
+//----------Architecture Description Register Classes--------------------------
+// Several register classes are automatically defined based upon information in
+// this architecture description.
+// 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
+// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
+//
+
+// Class for all 32 bit general purpose registers
+reg_class all_reg32(
+ R0,
+ R1,
+ R2,
+ R3,
+ R4,
+ R7,
+ R8,
+ R9,
+ R10,
+ R11,
+ R12,
+ R13,
+ R14,
+ R15,
+ R16,
+ R17,
+ R18,
+ R19,
+ R20,
+ R21,
+ R22,
+ R23,
+ R24,
+ R25,
+ R26,
+ R27,
+ R28,
+ R29,
+ R30,
+ R31
+);
+
+// Class for any 32 bit integer registers (excluding zr)
+reg_class any_reg32 %{
+ return _ANY_REG32_mask;
+%}
+
+// Singleton class for R10 int register
+reg_class int_r10_reg(R10);
+
+// Singleton class for R12 int register
+reg_class int_r12_reg(R12);
+
+// Singleton class for R13 int register
+reg_class int_r13_reg(R13);
+
+// Singleton class for R14 int register
+reg_class int_r14_reg(R14);
+
+// Class for all long integer registers
+reg_class all_reg(
+ R0, R0_H,
+ R1, R1_H,
+ R2, R2_H,
+ R3, R3_H,
+ R4, R4_H,
+ R7, R7_H,
+ R8, R8_H,
+ R9, R9_H,
+ R10, R10_H,
+ R11, R11_H,
+ R12, R12_H,
+ R13, R13_H,
+ R14, R14_H,
+ R15, R15_H,
+ R16, R16_H,
+ R17, R17_H,
+ R18, R18_H,
+ R19, R19_H,
+ R20, R20_H,
+ R21, R21_H,
+ R22, R22_H,
+ R23, R23_H,
+ R24, R24_H,
+ R25, R25_H,
+ R26, R26_H,
+ R27, R27_H,
+ R28, R28_H,
+ R29, R29_H,
+ R30, R30_H,
+ R31, R31_H
+);
+
+// Class for all long integer registers (excluding zr)
+reg_class any_reg %{
+ return _ANY_REG_mask;
+%}
+
+// Class for non-allocatable 32 bit registers
+reg_class non_allocatable_reg32(
+ R0, // zr
+ R1, // ra
+ R2, // sp
+ R3, // gp
+ R4, // tp
+ R23 // java thread
+);
+
+// Class for non-allocatable 64 bit registers
+reg_class non_allocatable_reg(
+ R0, R0_H, // zr
+ R1, R1_H, // ra
+ R2, R2_H, // sp
+ R3, R3_H, // gp
+ R4, R4_H, // tp
+ R23, R23_H // java thread
+);
+
+reg_class no_special_reg32 %{
+ return _NO_SPECIAL_REG32_mask;
+%}
+
+reg_class no_special_reg %{
+ return _NO_SPECIAL_REG_mask;
+%}
+
+reg_class ptr_reg %{
+ return _PTR_REG_mask;
+%}
+
+reg_class no_special_ptr_reg %{
+ return _NO_SPECIAL_PTR_REG_mask;
+%}
+
+// Class for 64 bit register r10
+reg_class r10_reg(
+ R10, R10_H
+);
+
+// Class for 64 bit register r11
+reg_class r11_reg(
+ R11, R11_H
+);
+
+// Class for 64 bit register r12
+reg_class r12_reg(
+ R12, R12_H
+);
+
+// Class for 64 bit register r13
+reg_class r13_reg(
+ R13, R13_H
+);
+
+// Class for 64 bit register r14
+reg_class r14_reg(
+ R14, R14_H
+);
+
+// Class for 64 bit register r15
+reg_class r15_reg(
+ R15, R15_H
+);
+
+// Class for 64 bit register r16
+reg_class r16_reg(
+ R16, R16_H
+);
+
+// Class for method register
+reg_class method_reg(
+ R31, R31_H
+);
+
+// Class for heapbase register
+reg_class heapbase_reg(
+ R27, R27_H
+);
+
+// Class for java thread register
+reg_class java_thread_reg(
+ R23, R23_H
+);
+
+reg_class r28_reg(
+ R28, R28_H
+);
+
+reg_class r29_reg(
+ R29, R29_H
+);
+
+reg_class r30_reg(
+ R30, R30_H
+);
+
+// Class for zero registesr
+reg_class zr_reg(
+ R0, R0_H
+);
+
+// Class for thread register
+reg_class thread_reg(
+ R4, R4_H
+);
+
+// Class for frame pointer register
+reg_class fp_reg(
+ R8, R8_H
+);
+
+// Class for link register
+reg_class ra_reg(
+ R1, R1_H
+);
+
+// Class for long sp register
+reg_class sp_reg(
+ R2, R2_H
+);
+
+// Class for all float registers
+reg_class float_reg(
+ F0,
+ F1,
+ F2,
+ F3,
+ F4,
+ F5,
+ F6,
+ F7,
+ F8,
+ F9,
+ F10,
+ F11,
+ F12,
+ F13,
+ F14,
+ F15,
+ F16,
+ F17,
+ F18,
+ F19,
+ F20,
+ F21,
+ F22,
+ F23,
+ F24,
+ F25,
+ F26,
+ F27,
+ F28,
+ F29,
+ F30,
+ F31
+);
+
+// Double precision float registers have virtual `high halves' that
+// are needed by the allocator.
+// Class for all double registers
+reg_class double_reg(
+ F0, F0_H,
+ F1, F1_H,
+ F2, F2_H,
+ F3, F3_H,
+ F4, F4_H,
+ F5, F5_H,
+ F6, F6_H,
+ F7, F7_H,
+ F8, F8_H,
+ F9, F9_H,
+ F10, F10_H,
+ F11, F11_H,
+ F12, F12_H,
+ F13, F13_H,
+ F14, F14_H,
+ F15, F15_H,
+ F16, F16_H,
+ F17, F17_H,
+ F18, F18_H,
+ F19, F19_H,
+ F20, F20_H,
+ F21, F21_H,
+ F22, F22_H,
+ F23, F23_H,
+ F24, F24_H,
+ F25, F25_H,
+ F26, F26_H,
+ F27, F27_H,
+ F28, F28_H,
+ F29, F29_H,
+ F30, F30_H,
+ F31, F31_H
+);
+
+// Class for all RVV vector registers
+reg_class vectora_reg(
+ V1, V1_H, V1_J, V1_K,
+ V2, V2_H, V2_J, V2_K,
+ V3, V3_H, V3_J, V3_K,
+ V4, V4_H, V4_J, V4_K,
+ V5, V5_H, V5_J, V5_K,
+ V6, V6_H, V6_J, V6_K,
+ V7, V7_H, V7_J, V7_K,
+ V8, V8_H, V8_J, V8_K,
+ V9, V9_H, V9_J, V9_K,
+ V10, V10_H, V10_J, V10_K,
+ V11, V11_H, V11_J, V11_K,
+ V12, V12_H, V12_J, V12_K,
+ V13, V13_H, V13_J, V13_K,
+ V14, V14_H, V14_J, V14_K,
+ V15, V15_H, V15_J, V15_K,
+ V16, V16_H, V16_J, V16_K,
+ V17, V17_H, V17_J, V17_K,
+ V18, V18_H, V18_J, V18_K,
+ V19, V19_H, V19_J, V19_K,
+ V20, V20_H, V20_J, V20_K,
+ V21, V21_H, V21_J, V21_K,
+ V22, V22_H, V22_J, V22_K,
+ V23, V23_H, V23_J, V23_K,
+ V24, V24_H, V24_J, V24_K,
+ V25, V25_H, V25_J, V25_K,
+ V26, V26_H, V26_J, V26_K,
+ V27, V27_H, V27_J, V27_K,
+ V28, V28_H, V28_J, V28_K,
+ V29, V29_H, V29_J, V29_K,
+ V30, V30_H, V30_J, V30_K,
+ V31, V31_H, V31_J, V31_K
+);
+
+// Class for 64 bit register f0
+reg_class f0_reg(
+ F0, F0_H
+);
+
+// Class for 64 bit register f1
+reg_class f1_reg(
+ F1, F1_H
+);
+
+// Class for 64 bit register f2
+reg_class f2_reg(
+ F2, F2_H
+);
+
+// Class for 64 bit register f3
+reg_class f3_reg(
+ F3, F3_H
+);
+
+// class for vector register v1
+reg_class v1_reg(
+ V1, V1_H, V1_J, V1_K
+);
+
+// class for vector register v2
+reg_class v2_reg(
+ V2, V2_H, V2_J, V2_K
+);
+
+// class for vector register v3
+reg_class v3_reg(
+ V3, V3_H, V3_J, V3_K
+);
+
+// class for vector register v4
+reg_class v4_reg(
+ V4, V4_H, V4_J, V4_K
+);
+
+// class for vector register v5
+reg_class v5_reg(
+ V5, V5_H, V5_J, V5_K
+);
+
+// class for condition codes
+reg_class reg_flags(RFLAGS);
+%}
+
+//----------DEFINITION BLOCK---------------------------------------------------
+// Define name --> value mappings to inform the ADLC of an integer valued name
+// Current support includes integer values in the range [0, 0x7FFFFFFF]
+// Format:
+// int_def ( , );
+// Generated Code in ad_.hpp
+// #define ()
+// // value ==
+// Generated code in ad_.cpp adlc_verification()
+// assert( == , "Expect () to equal ");
+//
+
+// we follow the ppc-aix port in using a simple cost model which ranks
+// register operations as cheap, memory ops as more expensive and
+// branches as most expensive. the first two have a low as well as a
+// normal cost. huge cost appears to be a way of saying don't do
+// something
+
+definitions %{
+ // The default cost (of a register move instruction).
+ int_def DEFAULT_COST ( 100, 100);
+ int_def ALU_COST ( 100, 1 * DEFAULT_COST); // unknown, const, arith, shift, slt,
+ // multi, auipc, nop, logical, move
+ int_def LOAD_COST ( 300, 3 * DEFAULT_COST); // load, fpload
+ int_def STORE_COST ( 100, 1 * DEFAULT_COST); // store, fpstore
+ int_def XFER_COST ( 300, 3 * DEFAULT_COST); // mfc, mtc, fcvt, fmove, fcmp
+ int_def BRANCH_COST ( 100, 1 * DEFAULT_COST); // branch, jmp, call
+ int_def IMUL_COST ( 1000, 10 * DEFAULT_COST); // imul
+ int_def IDIVSI_COST ( 3400, 34 * DEFAULT_COST); // idivdi
+ int_def IDIVDI_COST ( 6600, 66 * DEFAULT_COST); // idivsi
+ int_def FMUL_SINGLE_COST ( 500, 5 * DEFAULT_COST); // fadd, fmul, fmadd
+ int_def FMUL_DOUBLE_COST ( 700, 7 * DEFAULT_COST); // fadd, fmul, fmadd
+ int_def FDIV_COST ( 2000, 20 * DEFAULT_COST); // fdiv
+ int_def FSQRT_COST ( 2500, 25 * DEFAULT_COST); // fsqrt
+ int_def VOLATILE_REF_COST ( 1000, 10 * DEFAULT_COST);
+%}
+
+
+
+//----------SOURCE BLOCK-------------------------------------------------------
+// This is a block of C++ code which provides values, functions, and
+// definitions necessary in the rest of the architecture description
+
+source_hpp %{
+
+#include "asm/macroAssembler.hpp"
+#include "gc/shared/cardTable.hpp"
+#include "gc/shared/cardTableBarrierSet.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "opto/addnode.hpp"
+#include "opto/convertnode.hpp"
+
+extern RegMask _ANY_REG32_mask;
+extern RegMask _ANY_REG_mask;
+extern RegMask _PTR_REG_mask;
+extern RegMask _NO_SPECIAL_REG32_mask;
+extern RegMask _NO_SPECIAL_REG_mask;
+extern RegMask _NO_SPECIAL_PTR_REG_mask;
+
+class CallStubImpl {
+
+ //--------------------------------------------------------------
+ //---< Used for optimization in Compile::shorten_branches >---
+ //--------------------------------------------------------------
+
+ public:
+ // Size of call trampoline stub.
+ static uint size_call_trampoline() {
+ return 0; // no call trampolines on this platform
+ }
+
+ // number of relocations needed by a call trampoline stub
+ static uint reloc_call_trampoline() {
+ return 0; // no call trampolines on this platform
+ }
+};
+
+class HandlerImpl {
+
+ public:
+
+ static int emit_exception_handler(CodeBuffer &cbuf);
+ static int emit_deopt_handler(CodeBuffer& cbuf);
+
+ static uint size_exception_handler() {
+ return MacroAssembler::far_branch_size();
+ }
+
+ static uint size_deopt_handler() {
+ // count auipc + far branch
+ return NativeInstruction::instruction_size + MacroAssembler::far_branch_size();
+ }
+};
+
+class Node::PD {
+public:
+ enum NodeFlags {
+ _last_flag = Node::_last_flag
+ };
+};
+
+bool is_CAS(int opcode, bool maybe_volatile);
+
+// predicate controlling translation of CompareAndSwapX
+bool needs_acquiring_load_reserved(const Node *load);
+
+// predicate controlling addressing modes
+bool size_fits_all_mem_uses(AddPNode* addp, int shift);
+%}
+
+source %{
+
+// Derived RegMask with conditionally allocatable registers
+
+RegMask _ANY_REG32_mask;
+RegMask _ANY_REG_mask;
+RegMask _PTR_REG_mask;
+RegMask _NO_SPECIAL_REG32_mask;
+RegMask _NO_SPECIAL_REG_mask;
+RegMask _NO_SPECIAL_PTR_REG_mask;
+
+void reg_mask_init() {
+
+ _ANY_REG32_mask = _ALL_REG32_mask;
+ _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(x0->as_VMReg()));
+
+ _ANY_REG_mask = _ALL_REG_mask;
+ _ANY_REG_mask.SUBTRACT(_ZR_REG_mask);
+
+ _PTR_REG_mask = _ALL_REG_mask;
+ _PTR_REG_mask.SUBTRACT(_ZR_REG_mask);
+
+ _NO_SPECIAL_REG32_mask = _ALL_REG32_mask;
+ _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask);
+
+ _NO_SPECIAL_REG_mask = _ALL_REG_mask;
+ _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
+
+ _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask;
+ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
+
+ // x27 is not allocatable when compressed oops is on
+ if (UseCompressedOops) {
+ _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x27->as_VMReg()));
+ _NO_SPECIAL_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
+ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
+ }
+
+ // x8 is not allocatable when PreserveFramePointer is on
+ if (PreserveFramePointer) {
+ _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x8->as_VMReg()));
+ _NO_SPECIAL_REG_mask.SUBTRACT(_FP_REG_mask);
+ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_FP_REG_mask);
+ }
+}
+
+void PhaseOutput::pd_perform_mach_node_analysis() {
+}
+
+int MachNode::pd_alignment_required() const {
+ return 1;
+}
+
+int MachNode::compute_padding(int current_offset) const {
+ return 0;
+}
+
+// is_CAS(int opcode, bool maybe_volatile)
+//
+// return true if opcode is one of the possible CompareAndSwapX
+// values otherwise false.
+bool is_CAS(int opcode, bool maybe_volatile)
+{
+ switch (opcode) {
+ // We handle these
+ case Op_CompareAndSwapI:
+ case Op_CompareAndSwapL:
+ case Op_CompareAndSwapP:
+ case Op_CompareAndSwapN:
+ case Op_ShenandoahCompareAndSwapP:
+ case Op_ShenandoahCompareAndSwapN:
+ case Op_CompareAndSwapB:
+ case Op_CompareAndSwapS:
+ case Op_GetAndSetI:
+ case Op_GetAndSetL:
+ case Op_GetAndSetP:
+ case Op_GetAndSetN:
+ case Op_GetAndAddI:
+ case Op_GetAndAddL:
+ return true;
+ case Op_CompareAndExchangeI:
+ case Op_CompareAndExchangeN:
+ case Op_CompareAndExchangeB:
+ case Op_CompareAndExchangeS:
+ case Op_CompareAndExchangeL:
+ case Op_CompareAndExchangeP:
+ case Op_WeakCompareAndSwapB:
+ case Op_WeakCompareAndSwapS:
+ case Op_WeakCompareAndSwapI:
+ case Op_WeakCompareAndSwapL:
+ case Op_WeakCompareAndSwapP:
+ case Op_WeakCompareAndSwapN:
+ case Op_ShenandoahWeakCompareAndSwapP:
+ case Op_ShenandoahWeakCompareAndSwapN:
+ case Op_ShenandoahCompareAndExchangeP:
+ case Op_ShenandoahCompareAndExchangeN:
+ return maybe_volatile;
+ default:
+ return false;
+ }
+}
+
+// predicate controlling translation of CAS
+//
+// returns true if CAS needs to use an acquiring load otherwise false
+bool needs_acquiring_load_reserved(const Node *n)
+{
+ assert(n != NULL && is_CAS(n->Opcode(), true), "expecting a compare and swap");
+
+ LoadStoreNode* ldst = n->as_LoadStore();
+ if (n != NULL && is_CAS(n->Opcode(), false)) {
+ assert(ldst != NULL && ldst->trailing_membar() != NULL, "expected trailing membar");
+ } else {
+ return ldst != NULL && ldst->trailing_membar() != NULL;
+ }
+ // so we can just return true here
+ return true;
+}
+#define __ _masm.
+
+// advance declarations for helper functions to convert register
+// indices to register objects
+
+// the ad file has to provide implementations of certain methods
+// expected by the generic code
+//
+// REQUIRED FUNCTIONALITY
+
+//=============================================================================
+
+// !!!!! Special hack to get all types of calls to specify the byte offset
+// from the start of the call to the point where the return address
+// will point.
+
+int MachCallStaticJavaNode::ret_addr_offset()
+{
+ // jal
+ return 1 * NativeInstruction::instruction_size;
+}
+
+int MachCallDynamicJavaNode::ret_addr_offset()
+{
+ return 7 * NativeInstruction::instruction_size; // movptr, jal
+}
+
+int MachCallRuntimeNode::ret_addr_offset() {
+ // for generated stubs the call will be
+ // jal(addr)
+ // or with far branches
+ // jal(trampoline_stub)
+ // for real runtime callouts it will be 11 instructions
+ // see riscv_enc_java_to_runtime
+ // la(t1, retaddr) -> auipc + addi
+ // la(t0, RuntimeAddress(addr)) -> lui + addi + slli + addi + slli + addi
+ // addi(sp, sp, -2 * wordSize) -> addi
+ // sd(t1, Address(sp, wordSize)) -> sd
+ // jalr(t0) -> jalr
+ CodeBlob *cb = CodeCache::find_blob(_entry_point);
+ if (cb != NULL) {
+ return 1 * NativeInstruction::instruction_size;
+ } else {
+ return 11 * NativeInstruction::instruction_size;
+ }
+}
+
+int MachCallNativeNode::ret_addr_offset() {
+ Unimplemented();
+ return -1;
+}
+
+//
+// Compute padding required for nodes which need alignment
+//
+
+// With RVC a call instruction may get 2-byte aligned.
+// The address of the call instruction needs to be 4-byte aligned to
+// ensure that it does not span a cache line so that it can be patched.
+int CallStaticJavaDirectNode::compute_padding(int current_offset) const
+{
+ // to make sure the address of jal 4-byte aligned.
+ return align_up(current_offset, alignment_required()) - current_offset;
+}
+
+// With RVC a call instruction may get 2-byte aligned.
+// The address of the call instruction needs to be 4-byte aligned to
+// ensure that it does not span a cache line so that it can be patched.
+int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
+{
+ // skip the movptr in MacroAssembler::ic_call():
+ // lui + addi + slli + addi + slli + addi
+ // Though movptr() has already 4-byte aligned with or without RVC,
+ // We need to prevent from further changes by explicitly calculating the size.
+ const int movptr_size = 6 * NativeInstruction::instruction_size;
+ current_offset += movptr_size;
+ // to make sure the address of jal 4-byte aligned.
+ return align_up(current_offset, alignment_required()) - current_offset;
+}
+
+//=============================================================================
+
+#ifndef PRODUCT
+void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+ assert_cond(st != NULL);
+ st->print("BREAKPOINT");
+}
+#endif
+
+void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+ C2_MacroAssembler _masm(&cbuf);
+ Assembler::CompressibleRegion cr(&_masm);
+ __ ebreak();
+}
+
+uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
+ return MachNode::size(ra_);
+}
+
+//=============================================================================
+
+#ifndef PRODUCT
+ void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
+ st->print("nop \t# %d bytes pad for loops and calls", _count);
+ }
+#endif
+
+ void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
+ C2_MacroAssembler _masm(&cbuf);
+ Assembler::CompressibleRegion cr(&_masm); // nops shall be 2-byte under RVC for alignment purposes.
+ for (int i = 0; i < _count; i++) {
+ __ nop();
+ }
+ }
+
+ uint MachNopNode::size(PhaseRegAlloc*) const {
+ return _count * (UseRVC ? NativeInstruction::compressed_instruction_size : NativeInstruction::instruction_size);
+ }
+
+//=============================================================================
+const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
+
+int ConstantTable::calculate_table_base_offset() const {
+ return 0; // absolute addressing, no offset
+}
+
+bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
+void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) {
+ ShouldNotReachHere();
+}
+
+void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
+ // Empty encoding
+}
+
+uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
+ return 0;
+}
+
+#ifndef PRODUCT
+void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
+ assert_cond(st != NULL);
+ st->print("-- \t// MachConstantBaseNode (empty encoding)");
+}
+#endif
+
+#ifndef PRODUCT
+void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+ assert_cond(st != NULL && ra_ != NULL);
+ Compile* C = ra_->C;
+
+ int framesize = C->output()->frame_slots() << LogBytesPerInt;
+
+ if (C->output()->need_stack_bang(framesize)) {
+ st->print("# stack bang size=%d\n\t", framesize);
+ }
+
+ st->print("sd fp, [sp, #%d]\n\t", - 2 * wordSize);
+ st->print("sd ra, [sp, #%d]\n\t", - wordSize);
+ if (PreserveFramePointer) { st->print("sub fp, sp, #%d\n\t", 2 * wordSize); }
+ st->print("sub sp, sp, #%d\n\t", framesize);
+
+ if (C->stub_function() == NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
+ st->print("ld t0, [guard]\n\t");
+ st->print("membar LoadLoad\n\t");
+ st->print("ld t1, [xthread, #thread_disarmed_offset]\n\t");
+ st->print("beq t0, t1, skip\n\t");
+ st->print("jalr #nmethod_entry_barrier_stub\n\t");
+ st->print("j skip\n\t");
+ st->print("guard: int\n\t");
+ st->print("skip:\n\t");
+ }
+}
+#endif
+
+void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+ assert_cond(ra_ != NULL);
+ Compile* C = ra_->C;
+ C2_MacroAssembler _masm(&cbuf);
+
+ // n.b. frame size includes space for return pc and fp
+ const int framesize = C->output()->frame_size_in_bytes();
+
+ // insert a nop at the start of the prolog so we can patch in a
+ // branch if we need to invalidate the method later
+ __ nop();
+
+ assert_cond(C != NULL);
+
+ if (C->clinit_barrier_on_entry()) {
+ assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
+
+ Label L_skip_barrier;
+
+ __ mov_metadata(t1, C->method()->holder()->constant_encoding());
+ __ clinit_barrier(t1, t0, &L_skip_barrier);
+ __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
+ __ bind(L_skip_barrier);
+ }
+
+ int bangsize = C->output()->bang_size_in_bytes();
+ if (C->output()->need_stack_bang(bangsize)) {
+ __ generate_stack_overflow_check(bangsize);
+ }
+
+ __ build_frame(framesize);
+
+ if (C->stub_function() == NULL) {
+ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+ bs->nmethod_entry_barrier(&_masm);
+ }
+
+ if (VerifyStackAtCalls) {
+ Unimplemented();
+ }
+
+ C->output()->set_frame_complete(cbuf.insts_size());
+
+ if (C->has_mach_constant_base_node()) {
+ // NOTE: We set the table base offset here because users might be
+ // emitted before MachConstantBaseNode.
+ ConstantTable& constant_table = C->output()->constant_table();
+ constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
+ }
+}
+
+uint MachPrologNode::size(PhaseRegAlloc* ra_) const
+{
+ assert_cond(ra_ != NULL);
+ return MachNode::size(ra_); // too many variables; just compute it
+ // the hard way
+}
+
+int MachPrologNode::reloc() const
+{
+ return 0;
+}
+
+//=============================================================================
+
+#ifndef PRODUCT
+void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+ assert_cond(st != NULL && ra_ != NULL);
+ Compile* C = ra_->C;
+ assert_cond(C != NULL);
+ int framesize = C->output()->frame_size_in_bytes();
+
+ st->print("# pop frame %d\n\t", framesize);
+
+ if (framesize == 0) {
+ st->print("ld ra, [sp,#%d]\n\t", (2 * wordSize));
+ st->print("ld fp, [sp,#%d]\n\t", (3 * wordSize));
+ st->print("add sp, sp, #%d\n\t", (2 * wordSize));
+ } else {
+ st->print("add sp, sp, #%d\n\t", framesize);
+ st->print("ld ra, [sp,#%d]\n\t", - 2 * wordSize);
+ st->print("ld fp, [sp,#%d]\n\t", - wordSize);
+ }
+
+ if (do_polling() && C->is_method_compilation()) {
+ st->print("# test polling word\n\t");
+ st->print("ld t0, [xthread,#%d]\n\t", in_bytes(JavaThread::polling_word_offset()));
+ st->print("bgtu sp, t0, #slow_path");
+ }
+}
+#endif
+
+void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+ assert_cond(ra_ != NULL);
+ Compile* C = ra_->C;
+ C2_MacroAssembler _masm(&cbuf);
+ assert_cond(C != NULL);
+ int framesize = C->output()->frame_size_in_bytes();
+
+ __ remove_frame(framesize);
+
+ if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
+ __ reserved_stack_check();
+ }
+
+ if (do_polling() && C->is_method_compilation()) {
+ Label dummy_label;
+ Label* code_stub = &dummy_label;
+ if (!C->output()->in_scratch_emit_size()) {
+ code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
+ }
+ __ relocate(relocInfo::poll_return_type);
+ __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
+ }
+}
+
+uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
+ assert_cond(ra_ != NULL);
+ // Variable size. Determine dynamically.
+ return MachNode::size(ra_);
+}
+
+int MachEpilogNode::reloc() const {
+ // Return number of relocatable values contained in this instruction.
+ return 1; // 1 for polling page.
+}
+const Pipeline * MachEpilogNode::pipeline() const {
+ return MachNode::pipeline_class();
+}
+
+//=============================================================================
+
+// Figure out which register class each belongs in: rc_int, rc_float or
+// rc_stack.
+enum RC { rc_bad, rc_int, rc_float, rc_vector, rc_stack };
+
+static enum RC rc_class(OptoReg::Name reg) {
+
+ if (reg == OptoReg::Bad) {
+ return rc_bad;
+ }
+
+ // we have 30 int registers * 2 halves
+ // (t0 and t1 are omitted)
+ int slots_of_int_registers = RegisterImpl::max_slots_per_register * (RegisterImpl::number_of_registers - 2);
+ if (reg < slots_of_int_registers) {
+ return rc_int;
+ }
+
+ // we have 32 float register * 2 halves
+ int slots_of_float_registers = FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers;
+ if (reg < slots_of_int_registers + slots_of_float_registers) {
+ return rc_float;
+ }
+
+ // we have 32 vector register * 4 halves
+ int slots_of_vector_registers = VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers;
+ if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_vector_registers) {
+ return rc_vector;
+ }
+
+ // Between vector regs & stack is the flags regs.
+ assert(OptoReg::is_stack(reg), "blow up if spilling flags");
+
+ return rc_stack;
+}
+
+uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
+ assert_cond(ra_ != NULL);
+ Compile* C = ra_->C;
+
+ // Get registers to move.
+ OptoReg::Name src_hi = ra_->get_reg_second(in(1));
+ OptoReg::Name src_lo = ra_->get_reg_first(in(1));
+ OptoReg::Name dst_hi = ra_->get_reg_second(this);
+ OptoReg::Name dst_lo = ra_->get_reg_first(this);
+
+ enum RC src_hi_rc = rc_class(src_hi);
+ enum RC src_lo_rc = rc_class(src_lo);
+ enum RC dst_hi_rc = rc_class(dst_hi);
+ enum RC dst_lo_rc = rc_class(dst_lo);
+
+ assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
+
+ if (src_hi != OptoReg::Bad) {
+ assert((src_lo & 1) == 0 && src_lo + 1 == src_hi &&
+ (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi,
+ "expected aligned-adjacent pairs");
+ }
+
+ if (src_lo == dst_lo && src_hi == dst_hi) {
+ return 0; // Self copy, no move.
+ }
+
+ bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
+ (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
+ int src_offset = ra_->reg2offset(src_lo);
+ int dst_offset = ra_->reg2offset(dst_lo);
+
+ if (bottom_type()->isa_vect() != NULL) {
+ uint ireg = ideal_reg();
+ if (ireg == Op_VecA && cbuf) {
+ C2_MacroAssembler _masm(cbuf);
+ Assembler::CompressibleRegion cr(&_masm);
+ int vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
+ if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
+ // stack to stack
+ __ spill_copy_vector_stack_to_stack(src_offset, dst_offset,
+ vector_reg_size_in_bytes);
+ } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_stack) {
+ // vpr to stack
+ __ spill(as_VectorRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo));
+ } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vector) {
+ // stack to vpr
+ __ unspill(as_VectorRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo));
+ } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_vector) {
+ // vpr to vpr
+ __ vmv1r_v(as_VectorRegister(Matcher::_regEncode[dst_lo]), as_VectorRegister(Matcher::_regEncode[src_lo]));
+ } else {
+ ShouldNotReachHere();
+ }
+ }
+ } else if (cbuf != NULL) {
+ C2_MacroAssembler _masm(cbuf);
+ Assembler::CompressibleRegion cr(&_masm);
+ switch (src_lo_rc) {
+ case rc_int:
+ if (dst_lo_rc == rc_int) { // gpr --> gpr copy
+ if (!is64 && this->ideal_reg() != Op_RegI) { // zero extended for narrow oop or klass
+ __ zero_extend(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo]), 32);
+ } else {
+ __ mv(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo]));
+ }
+ } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
+ if (is64) {
+ __ fmv_d_x(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+ as_Register(Matcher::_regEncode[src_lo]));
+ } else {
+ __ fmv_w_x(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+ as_Register(Matcher::_regEncode[src_lo]));
+ }
+ } else { // gpr --> stack spill
+ assert(dst_lo_rc == rc_stack, "spill to bad register class");
+ __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
+ }
+ break;
+ case rc_float:
+ if (dst_lo_rc == rc_int) { // fpr --> gpr copy
+ if (is64) {
+ __ fmv_x_d(as_Register(Matcher::_regEncode[dst_lo]),
+ as_FloatRegister(Matcher::_regEncode[src_lo]));
+ } else {
+ __ fmv_x_w(as_Register(Matcher::_regEncode[dst_lo]),
+ as_FloatRegister(Matcher::_regEncode[src_lo]));
+ }
+ } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
+ if (is64) {
+ __ fmv_d(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+ as_FloatRegister(Matcher::_regEncode[src_lo]));
+ } else {
+ __ fmv_s(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+ as_FloatRegister(Matcher::_regEncode[src_lo]));
+ }
+ } else { // fpr --> stack spill
+ assert(dst_lo_rc == rc_stack, "spill to bad register class");
+ __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
+ is64, dst_offset);
+ }
+ break;
+ case rc_stack:
+ if (dst_lo_rc == rc_int) { // stack --> gpr load
+ if (this->ideal_reg() == Op_RegI) {
+ __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
+ } else { // // zero extended for narrow oop or klass
+ __ unspillu(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
+ }
+ } else if (dst_lo_rc == rc_float) { // stack --> fpr load
+ __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+ is64, src_offset);
+ } else { // stack --> stack copy
+ assert(dst_lo_rc == rc_stack, "spill to bad register class");
+ if (this->ideal_reg() == Op_RegI) {
+ __ unspill(t0, is64, src_offset);
+ } else { // zero extended for narrow oop or klass
+ __ unspillu(t0, is64, src_offset);
+ }
+ __ spill(t0, is64, dst_offset);
+ }
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ }
+
+ if (st != NULL) {
+ st->print("spill ");
+ if (src_lo_rc == rc_stack) {
+ st->print("[sp, #%d] -> ", src_offset);
+ } else {
+ st->print("%s -> ", Matcher::regName[src_lo]);
+ }
+ if (dst_lo_rc == rc_stack) {
+ st->print("[sp, #%d]", dst_offset);
+ } else {
+ st->print("%s", Matcher::regName[dst_lo]);
+ }
+ if (bottom_type()->isa_vect() != NULL) {
+ int vsize = 0;
+ if (ideal_reg() == Op_VecA) {
+ vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8;
+ } else {
+ ShouldNotReachHere();
+ }
+ st->print("\t# vector spill size = %d", vsize);
+ } else {
+ st->print("\t# spill size = %d", is64 ? 64 : 32);
+ }
+ }
+
+ return 0;
+}
+
+#ifndef PRODUCT
+void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+ if (ra_ == NULL) {
+ st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
+ } else {
+ implementation(NULL, ra_, false, st);
+ }
+}
+#endif
+
+void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+ implementation(&cbuf, ra_, false, NULL);
+}
+
+uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
+ return MachNode::size(ra_);
+}
+
+//=============================================================================
+
+#ifndef PRODUCT
+void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+ assert_cond(ra_ != NULL && st != NULL);
+ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+ int reg = ra_->get_reg_first(this);
+ st->print("add %s, sp, #%d\t# box lock",
+ Matcher::regName[reg], offset);
+}
+#endif
+
+void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+ C2_MacroAssembler _masm(&cbuf);
+
+ assert_cond(ra_ != NULL);
+ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+ int reg = ra_->get_encode(this);
+
+ if (is_imm_in_range(offset, 12, 0)) {
+ __ addi(as_Register(reg), sp, offset);
+ } else if (is_imm_in_range(offset, 32, 0)) {
+ __ li32(t0, offset);
+ __ add(as_Register(reg), sp, t0);
+ } else {
+ ShouldNotReachHere();
+ }
+}
+
+uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
+ // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
+ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+
+ if (is_imm_in_range(offset, 12, 0)) {
+ return NativeInstruction::instruction_size;
+ } else {
+ return 3 * NativeInstruction::instruction_size; // lui + addiw + add;
+ }
+}
+
+//=============================================================================
+
+#ifndef PRODUCT
+void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
+{
+ assert_cond(st != NULL);
+ st->print_cr("# MachUEPNode");
+ if (UseCompressedClassPointers) {
+ st->print_cr("\tlwu t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
+ if (CompressedKlassPointers::shift() != 0) {
+ st->print_cr("\tdecode_klass_not_null t0, t0");
+ }
+ } else {
+ st->print_cr("\tld t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
+ }
+ st->print_cr("\tbeq t0, t1, ic_hit");
+ st->print_cr("\tj, SharedRuntime::_ic_miss_stub\t # Inline cache check");
+ st->print_cr("\tic_hit:");
+}
+#endif
+
+void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
+{
+ // This is the unverified entry point.
+ C2_MacroAssembler _masm(&cbuf);
+
+ Label skip;
+ __ cmp_klass(j_rarg0, t1, t0, skip);
+ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+ __ bind(skip);
+}
+
+uint MachUEPNode::size(PhaseRegAlloc* ra_) const
+{
+ assert_cond(ra_ != NULL);
+ return MachNode::size(ra_);
+}
+
+// REQUIRED EMIT CODE
+
+//=============================================================================
+
+// Emit exception handler code.
+int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
+{
+ // la_patchable t0, #exception_blob_entry_point
+ // jr (offset)t0
+ // or
+ // j #exception_blob_entry_point
+ // Note that the code buffer's insts_mark is always relative to insts.
+ // That's why we must use the macroassembler to generate a handler.
+ C2_MacroAssembler _masm(&cbuf);
+ address base = __ start_a_stub(size_exception_handler());
+ if (base == NULL) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return 0; // CodeBuffer::expand failed
+ }
+ int offset = __ offset();
+ __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
+ assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
+ __ end_a_stub();
+ return offset;
+}
+
+// Emit deopt handler code.
+int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
+{
+ // Note that the code buffer's insts_mark is always relative to insts.
+ // That's why we must use the macroassembler to generate a handler.
+ C2_MacroAssembler _masm(&cbuf);
+ address base = __ start_a_stub(size_deopt_handler());
+ if (base == NULL) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return 0; // CodeBuffer::expand failed
+ }
+ int offset = __ offset();
+
+ __ auipc(ra, 0);
+ __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+
+ assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
+ __ end_a_stub();
+ return offset;
+
+}
+// REQUIRED MATCHER CODE
+
+//=============================================================================
+
+const bool Matcher::match_rule_supported(int opcode) {
+ if (!has_match_rule(opcode)) {
+ return false;
+ }
+
+ switch (opcode) {
+ case Op_CacheWB: // fall through
+ case Op_CacheWBPreSync: // fall through
+ case Op_CacheWBPostSync:
+ if (!VM_Version::supports_data_cache_line_flush()) {
+ return false;
+ }
+ break;
+
+ case Op_StrCompressedCopy: // fall through
+ case Op_StrInflatedCopy: // fall through
+ case Op_CountPositives:
+ return UseRVV;
+
+ case Op_EncodeISOArray:
+ return UseRVV && SpecialEncodeISOArray;
+
+ case Op_PopCountI:
+ case Op_PopCountL:
+ return UsePopCountInstruction;
+
+ case Op_RotateRight:
+ case Op_RotateLeft:
+ case Op_CountLeadingZerosI:
+ case Op_CountLeadingZerosL:
+ case Op_CountTrailingZerosI:
+ case Op_CountTrailingZerosL:
+ return UseZbb;
+ }
+
+ return true; // Per default match rules are supported.
+}
+
+// Identify extra cases that we might want to provide match rules for vector nodes and
+// other intrinsics guarded with vector length (vlen) and element type (bt).
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
+ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
+ return false;
+ }
+
+ return op_vec_supported(opcode);
+}
+
+const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
+ return false;
+}
+
+const RegMask* Matcher::predicate_reg_mask(void) {
+ return NULL;
+}
+
+const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) {
+ return NULL;
+}
+
+// Vector calling convention not yet implemented.
+const bool Matcher::supports_vector_calling_convention(void) {
+ return false;
+}
+
+OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
+ Unimplemented();
+ return OptoRegPair(0, 0);
+}
+
+// Is this branch offset short enough that a short branch can be used?
+//
+// NOTE: If the platform does not provide any short branch variants, then
+// this method should return false for offset 0.
+// |---label(L1)-----|
+// |-----------------|
+// |-----------------|----------eq: float-------------------
+// |-----------------| // far_cmpD_branch | cmpD_branch
+// |------- ---------| feq; | feq;
+// |-far_cmpD_branch-| beqz done; | bnez L;
+// |-----------------| j L; |
+// |-----------------| bind(done); |
+// |-----------------|--------------------------------------
+// |-----------------| // so shortBrSize = br_size - 4;
+// |-----------------| // so offs = offset - shortBrSize + 4;
+// |---label(L2)-----|
+bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
+ // The passed offset is relative to address of the branch.
+ int shortBrSize = br_size - 4;
+ int offs = offset - shortBrSize + 4;
+ return (-4096 <= offs && offs < 4096);
+}
+
+// Vector width in bytes.
+const int Matcher::vector_width_in_bytes(BasicType bt) {
+ if (UseRVV) {
+ // The MaxVectorSize should have been set by detecting RVV max vector register size when check UseRVV.
+ // MaxVectorSize == VM_Version::_initial_vector_length
+ return MaxVectorSize;
+ }
+ return 0;
+}
+
+// Limits on vector size (number of elements) loaded into vector.
+const int Matcher::max_vector_size(const BasicType bt) {
+ return vector_width_in_bytes(bt) / type2aelembytes(bt);
+}
+const int Matcher::min_vector_size(const BasicType bt) {
+ return max_vector_size(bt);
+}
+
+// Vector ideal reg.
+const uint Matcher::vector_ideal_reg(int len) {
+ assert(MaxVectorSize >= len, "");
+ if (UseRVV) {
+ return Op_VecA;
+ }
+
+ ShouldNotReachHere();
+ return 0;
+}
+
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
+ return Matcher::max_vector_size(bt);
+}
+
+MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) {
+ ShouldNotReachHere(); // generic vector operands not supported
+ return NULL;
+}
+
+bool Matcher::is_reg2reg_move(MachNode* m) {
+ ShouldNotReachHere(); // generic vector operands not supported
+ return false;
+}
+
+bool Matcher::is_generic_vector(MachOper* opnd) {
+ ShouldNotReachHere(); // generic vector operands not supported
+ return false;
+}
+
+// Return whether or not this register is ever used as an argument.
+// This function is used on startup to build the trampoline stubs in
+// generateOptoStub. Registers not mentioned will be killed by the VM
+// call in the trampoline, and arguments in those registers not be
+// available to the callee.
+bool Matcher::can_be_java_arg(int reg)
+{
+ return
+ reg == R10_num || reg == R10_H_num ||
+ reg == R11_num || reg == R11_H_num ||
+ reg == R12_num || reg == R12_H_num ||
+ reg == R13_num || reg == R13_H_num ||
+ reg == R14_num || reg == R14_H_num ||
+ reg == R15_num || reg == R15_H_num ||
+ reg == R16_num || reg == R16_H_num ||
+ reg == R17_num || reg == R17_H_num ||
+ reg == F10_num || reg == F10_H_num ||
+ reg == F11_num || reg == F11_H_num ||
+ reg == F12_num || reg == F12_H_num ||
+ reg == F13_num || reg == F13_H_num ||
+ reg == F14_num || reg == F14_H_num ||
+ reg == F15_num || reg == F15_H_num ||
+ reg == F16_num || reg == F16_H_num ||
+ reg == F17_num || reg == F17_H_num;
+}
+
+bool Matcher::is_spillable_arg(int reg)
+{
+ return can_be_java_arg(reg);
+}
+
+uint Matcher::int_pressure_limit()
+{
+ // A derived pointer is live at CallNode and then is flagged by RA
+ // as a spilled LRG. Spilling heuristics(Spill-USE) explicitly skip
+ // derived pointers and lastly fail to spill after reaching maximum
+ // number of iterations. Lowering the default pressure threshold to
+ // (_NO_SPECIAL_REG32_mask.Size() minus 1) forces CallNode to become
+ // a high register pressure area of the code so that split_DEF can
+ // generate DefinitionSpillCopy for the derived pointer.
+ uint default_int_pressure_threshold = _NO_SPECIAL_REG32_mask.Size() - 1;
+ if (!PreserveFramePointer) {
+ // When PreserveFramePointer is off, frame pointer is allocatable,
+ // but different from other SOC registers, it is excluded from
+ // fatproj's mask because its save type is No-Save. Decrease 1 to
+ // ensure high pressure at fatproj when PreserveFramePointer is off.
+ // See check_pressure_at_fatproj().
+ default_int_pressure_threshold--;
+ }
+ return (INTPRESSURE == -1) ? default_int_pressure_threshold : INTPRESSURE;
+}
+
+uint Matcher::float_pressure_limit()
+{
+ // _FLOAT_REG_mask is generated by adlc from the float_reg register class.
+ return (FLOATPRESSURE == -1) ? _FLOAT_REG_mask.Size() : FLOATPRESSURE;
+}
+
+bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
+ return false;
+}
+
+RegMask Matcher::divI_proj_mask() {
+ ShouldNotReachHere();
+ return RegMask();
+}
+
+// Register for MODI projection of divmodI.
+RegMask Matcher::modI_proj_mask() {
+ ShouldNotReachHere();
+ return RegMask();
+}
+
+// Register for DIVL projection of divmodL.
+RegMask Matcher::divL_proj_mask() {
+ ShouldNotReachHere();
+ return RegMask();
+}
+
+// Register for MODL projection of divmodL.
+RegMask Matcher::modL_proj_mask() {
+ ShouldNotReachHere();
+ return RegMask();
+}
+
+const RegMask Matcher::method_handle_invoke_SP_save_mask() {
+ return FP_REG_mask();
+}
+
+bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
+ assert_cond(addp != NULL);
+ for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
+ Node* u = addp->fast_out(i);
+ if (u != NULL && u->is_Mem()) {
+ int opsize = u->as_Mem()->memory_size();
+ assert(opsize > 0, "unexpected memory operand size");
+ if (u->as_Mem()->memory_size() != (1 << shift)) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+// Should the Matcher clone input 'm' of node 'n'?
+bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
+ assert_cond(m != NULL);
+ if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
+ mstack.push(m, Visit); // m = ShiftCntV
+ return true;
+ }
+ return false;
+}
+
+// Should the Matcher clone shifts on addressing modes, expecting them
+// to be subsumed into complex addressing expressions or compute them
+// into registers?
+bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
+ return clone_base_plus_offset_address(m, mstack, address_visited);
+}
+
+%}
+
+
+
+//----------ENCODING BLOCK-----------------------------------------------------
+// This block specifies the encoding classes used by the compiler to
+// output byte streams. Encoding classes are parameterized macros
+// used by Machine Instruction Nodes in order to generate the bit
+// encoding of the instruction. Operands specify their base encoding
+// interface with the interface keyword. There are currently
+// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
+// COND_INTER. REG_INTER causes an operand to generate a function
+// which returns its register number when queried. CONST_INTER causes
+// an operand to generate a function which returns the value of the
+// constant when queried. MEMORY_INTER causes an operand to generate
+// four functions which return the Base Register, the Index Register,
+// the Scale Value, and the Offset Value of the operand when queried.
+// COND_INTER causes an operand to generate six functions which return
+// the encoding code (ie - encoding bits for the instruction)
+// associated with each basic boolean condition for a conditional
+// instruction.
+//
+// Instructions specify two basic values for encoding. Again, a
+// function is available to check if the constant displacement is an
+// oop. They use the ins_encode keyword to specify their encoding
+// classes (which must be a sequence of enc_class names, and their
+// parameters, specified in the encoding block), and they use the
+// opcode keyword to specify, in order, their primary, secondary, and
+// tertiary opcode. Only the opcode sections which a particular
+// instruction needs for encoding need to be specified.
+encode %{
+ // BEGIN Non-volatile memory access
+
+ enc_class riscv_enc_li_imm(iRegIorL dst, immIorL src) %{
+ C2_MacroAssembler _masm(&cbuf);
+ Assembler::CompressibleRegion cr(&_masm);
+ int64_t con = (int64_t)$src$$constant;
+ Register dst_reg = as_Register($dst$$reg);
+ __ li(dst_reg, con);
+ %}
+
+ enc_class riscv_enc_mov_p(iRegP dst, immP src) %{
+ C2_MacroAssembler _masm(&cbuf);
+ Register dst_reg = as_Register($dst$$reg);
+ address con = (address)$src$$constant;
+ if (con == NULL || con == (address)1) {
+ ShouldNotReachHere();
+ } else {
+ relocInfo::relocType rtype = $src->constant_reloc();
+ if (rtype == relocInfo::oop_type) {
+ __ movoop(dst_reg, (jobject)con, /*immediate*/true);
+ } else if (rtype == relocInfo::metadata_type) {
+ __ mov_metadata(dst_reg, (Metadata*)con);
+ } else {
+ assert(rtype == relocInfo::none, "unexpected reloc type");
+ __ li(dst_reg, $src$$constant);
+ }
+ }
+ %}
+
+ enc_class riscv_enc_mov_p1(iRegP dst) %{
+ C2_MacroAssembler _masm(&cbuf);
+ Assembler::CompressibleRegion cr(&_masm);
+ Register dst_reg = as_Register($dst$$reg);
+ __ li(dst_reg, 1);
+ %}
+
+ enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{
+ C2_MacroAssembler _masm(&cbuf);
+ __ load_byte_map_base($dst$$Register);
+ %}
+
+ enc_class riscv_enc_mov_n(iRegN dst, immN src) %{
+ C2_MacroAssembler _masm(&cbuf);
+ Register dst_reg = as_Register($dst$$reg);
+ address con = (address)$src$$constant;
+ if (con == NULL) {
+ ShouldNotReachHere();
+ } else {
+ relocInfo::relocType rtype = $src->constant_reloc();
+ assert(rtype == relocInfo::oop_type, "unexpected reloc type");
+ __ set_narrow_oop(dst_reg, (jobject)con);
+ }
+ %}
+
+ enc_class riscv_enc_mov_zero(iRegNorP dst) %{
+ C2_MacroAssembler _masm(&cbuf);
+ Register dst_reg = as_Register($dst$$reg);
+ __ mv(dst_reg, zr);
+ %}
+
+ enc_class riscv_enc_mov_nk(iRegN dst, immNKlass src) %{
+ C2_MacroAssembler _masm(&cbuf);
+ Register dst_reg = as_Register($dst$$reg);
+ address con = (address)$src$$constant;
+ if (con == NULL) {
+ ShouldNotReachHere();
+ } else {
+ relocInfo::relocType rtype = $src->constant_reloc();
+ assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
+ __ set_narrow_klass(dst_reg, (Klass *)con);
+ }
+ %}
+
+ enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
+ C2_MacroAssembler _masm(&cbuf);
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
+ /*result as bool*/ true);
+ %}
+
+ enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
+ C2_MacroAssembler _masm(&cbuf);
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
+ /*result as bool*/ true);
+ %}
+
+ enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
+ C2_MacroAssembler _masm(&cbuf);
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
+ /*result as bool*/ true);
+ %}
+
+ enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
+ C2_MacroAssembler _masm(&cbuf);
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
+ /*result as bool*/ true);
+ %}
+
+ enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
+ C2_MacroAssembler _masm(&cbuf);
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
+ /*result as bool*/ true);
+ %}
+
+ enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
+ C2_MacroAssembler _masm(&cbuf);
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
+ /*result as bool*/ true);
+ %}
+
+ // compare and branch instruction encodings
+
+ enc_class riscv_enc_j(label lbl) %{
+ C2_MacroAssembler _masm(&cbuf);
+ Label* L = $lbl$$label;
+ __ j(*L);
+ %}
+
+ enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{
+ C2_MacroAssembler _masm(&cbuf);
+ Label* L = $lbl$$label;
+ switch ($cmp$$cmpcode) {
+ case(BoolTest::ge):
+ __ j(*L);
+ break;
+ case(BoolTest::lt):
+ break;
+ default:
+ Unimplemented();
+ }
+ %}
+
+ // call instruction encodings
+
+ enc_class riscv_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result) %{
+ Register sub_reg = as_Register($sub$$reg);
+ Register super_reg = as_Register($super$$reg);
+ Register temp_reg = as_Register($temp$$reg);
+ Register result_reg = as_Register($result$$reg);
+ Register cr_reg = t1;
+
+ Label miss;
+ Label done;
+ C2_MacroAssembler _masm(&cbuf);
+ __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
+ NULL, &miss);
+ if ($primary) {
+ __ mv(result_reg, zr);
+ } else {
+ __ mv(cr_reg, zr);
+ __ j(done);
+ }
+
+ __ bind(miss);
+ if (!$primary) {
+ __ li(cr_reg, 1);
+ }
+
+ __ bind(done);
+ %}
+
+ enc_class riscv_enc_java_static_call(method meth) %{
+ C2_MacroAssembler _masm(&cbuf);
+
+ address addr = (address)$meth$$method;
+ address call = NULL;
+ assert_cond(addr != NULL);
+ if (!_method) {
+ // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
+ call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
+ if (call == NULL) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
+ }
+ } else {
+ int method_index = resolved_method_index(cbuf);
+ RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
+ : static_call_Relocation::spec(method_index);
+ call = __ trampoline_call(Address(addr, rspec), &cbuf);
+ if (call == NULL) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
+ }
+
+ // Emit stub for static call
+ address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
+ if (stub == NULL) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
+ }
+ }
+ %}
+
+ enc_class riscv_enc_java_dynamic_call(method meth) %{
+ C2_MacroAssembler _masm(&cbuf);
+ int method_index = resolved_method_index(cbuf);
+ address call = __ ic_call((address)$meth$$method, method_index);
+ if (call == NULL) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
+ }
+ %}
+
+ enc_class riscv_enc_call_epilog() %{
+ C2_MacroAssembler _masm(&cbuf);
+ if (VerifyStackAtCalls) {
+ // Check that stack depth is unchanged: find majik cookie on stack
+ __ call_Unimplemented();
+ }
+ %}
+
+ enc_class riscv_enc_java_to_runtime(method meth) %{
+ C2_MacroAssembler _masm(&cbuf);
+
+ // some calls to generated routines (arraycopy code) are scheduled
+ // by C2 as runtime calls. if so we can call them using a jr (they
+ // will be in a reachable segment) otherwise we have to use a jalr
+ // which loads the absolute address into a register.
+ address entry = (address)$meth$$method;
+ CodeBlob *cb = CodeCache::find_blob(entry);
+ if (cb != NULL) {
+ address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
+ if (call == NULL) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
+ }
+ } else {
+ Label retaddr;
+ __ la(t1, retaddr);
+ __ la(t0, RuntimeAddress(entry));
+ // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
+ __ addi(sp, sp, -2 * wordSize);
+ __ sd(t1, Address(sp, wordSize));
+ __ jalr(t0);
+ __ bind(retaddr);
+ __ addi(sp, sp, 2 * wordSize);
+ }
+ %}
+
+ // using the cr register as the bool result: 0 for success; others failed.
+ enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{
+ C2_MacroAssembler _masm(&cbuf);
+ Register flag = t1;
+ Register oop = as_Register($object$$reg);
+ Register box = as_Register($box$$reg);
+ Register disp_hdr = as_Register($tmp1$$reg);
+ Register tmp = as_Register($tmp2$$reg);
+ Label cont;
+ Label object_has_monitor;
+
+ assert_different_registers(oop, box, tmp, disp_hdr, t0);
+
+ // Load markWord from object into displaced_header.
+ __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
+
+ if (DiagnoseSyncOnValueBasedClasses != 0) {
+ __ load_klass(flag, oop);
+ __ lwu(flag, Address(flag, Klass::access_flags_offset()));
+ __ andi(flag, flag, JVM_ACC_IS_VALUE_BASED_CLASS, tmp /* tmp */);
+ __ bnez(flag, cont, true /* is_far */);
+ }
+
+ // Check for existing monitor
+ __ andi(t0, disp_hdr, markWord::monitor_value);
+ __ bnez(t0, object_has_monitor);
+
+ if (!UseHeavyMonitors) {
+ // Set tmp to be (markWord of object | UNLOCK_VALUE).
+ __ ori(tmp, disp_hdr, markWord::unlocked_value);
+
+ // Initialize the box. (Must happen before we update the object mark!)
+ __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+
+ // Compare object markWord with an unlocked value (tmp) and if
+ // equal exchange the stack address of our box with object markWord.
+ // On failure disp_hdr contains the possibly locked markWord.
+ __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq,
+ Assembler::rl, /*result*/disp_hdr);
+ __ mv(flag, zr);
+ __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas
+
+ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
+
+ // If the compare-and-exchange succeeded, then we found an unlocked
+ // object, will have now locked it will continue at label cont
+ // We did not see an unlocked object so try the fast recursive case.
+
+ // Check if the owner is self by comparing the value in the
+ // markWord of object (disp_hdr) with the stack pointer.
+ __ sub(disp_hdr, disp_hdr, sp);
+ __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markWord::lock_mask_in_place));
+ // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont,
+ // hence we can store 0 as the displaced header in the box, which indicates that it is a
+ // recursive lock.
+ __ andr(tmp/*==0?*/, disp_hdr, tmp);
+ __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+ __ mv(flag, tmp); // we can use the value of tmp as the result here
+ } else {
+ __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow-path
+ }
+
+ __ j(cont);
+
+ // Handle existing monitor.
+ __ bind(object_has_monitor);
+ // The object's monitor m is unlocked iff m->owner == NULL,
+ // otherwise m->owner may contain a thread or a stack address.
+ //
+ // Try to CAS m->owner from NULL to current thread.
+ __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markWord::monitor_value));
+ __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq,
+ Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected)
+
+ // Store a non-null value into the box to avoid looking like a re-entrant
+ // lock. The fast-path monitor unlock code checks for
+ // markWord::monitor_value so use markWord::unused_mark which has the
+ // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
+ __ mv(tmp, (address)markWord::unused_mark().value());
+ __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+
+ __ beqz(flag, cont); // CAS success means locking succeeded
+
+ __ bne(flag, xthread, cont); // Check for recursive locking
+
+ // Recursive lock case
+ __ mv(flag, zr);
+ __ ld(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value));
+ __ add(tmp, tmp, 1u);
+ __ sd(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value));
+
+ __ bind(cont);
+ %}
+
+ // using cr flag to indicate the fast_unlock result: 0 for success; others failed.
+ enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{
+ C2_MacroAssembler _masm(&cbuf);
+ Register flag = t1;
+ Register oop = as_Register($object$$reg);
+ Register box = as_Register($box$$reg);
+ Register disp_hdr = as_Register($tmp1$$reg);
+ Register tmp = as_Register($tmp2$$reg);
+ Label cont;
+ Label object_has_monitor;
+
+ assert_different_registers(oop, box, tmp, disp_hdr, flag);
+
+ if (!UseHeavyMonitors) {
+ // Find the lock address and load the displaced header from the stack.
+ __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+
+ // If the displaced header is 0, we have a recursive unlock.
+ __ mv(flag, disp_hdr);
+ __ beqz(disp_hdr, cont);
+ }
+
+ // Handle existing monitor.
+ __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
+ __ andi(t0, disp_hdr, markWord::monitor_value);
+ __ bnez(t0, object_has_monitor);
+
+ if (!UseHeavyMonitors) {
+ // Check if it is still a light weight lock, this is true if we
+ // see the stack address of the basicLock in the markWord of the
+ // object.
+
+ __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed,
+ Assembler::rl, /*result*/tmp);
+ __ xorr(flag, box, tmp); // box == tmp if cas succeeds
+ } else {
+ __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow path
+ }
+ __ j(cont);
+
+ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
+
+ // Handle existing monitor.
+ __ bind(object_has_monitor);
+ STATIC_ASSERT(markWord::monitor_value <= INT_MAX);
+ __ add(tmp, tmp, -(int)markWord::monitor_value); // monitor
+ __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
+
+ Label notRecursive;
+ __ beqz(disp_hdr, notRecursive); // Will be 0 if not recursive.
+
+ // Recursive lock
+ __ addi(disp_hdr, disp_hdr, -1);
+ __ sd(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
+ __ mv(flag, zr);
+ __ j(cont);
+
+ __ bind(notRecursive);
+ __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
+ __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
+ __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0.
+ __ bnez(flag, cont);
+ // need a release store here
+ __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
+ __ sd(zr, Address(tmp)); // set unowned
+
+ __ bind(cont);
+ %}
+
+ // arithmetic encodings
+
+ enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
+ C2_MacroAssembler _masm(&cbuf);
+ Register dst_reg = as_Register($dst$$reg);
+ Register src1_reg = as_Register($src1$$reg);
+ Register src2_reg = as_Register($src2$$reg);
+ __ corrected_idivl(dst_reg, src1_reg, src2_reg, false);
+ %}
+
+ enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
+ C2_MacroAssembler _masm(&cbuf);
+ Register dst_reg = as_Register($dst$$reg);
+ Register src1_reg = as_Register($src1$$reg);
+ Register src2_reg = as_Register($src2$$reg);
+ __ corrected_idivq(dst_reg, src1_reg, src2_reg, false);
+ %}
+
+ enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
+ C2_MacroAssembler _masm(&cbuf);
+ Register dst_reg = as_Register($dst$$reg);
+ Register src1_reg = as_Register($src1$$reg);
+ Register src2_reg = as_Register($src2$$reg);
+ __ corrected_idivl(dst_reg, src1_reg, src2_reg, true);
+ %}
+
+ enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
+ C2_MacroAssembler _masm(&cbuf);
+ Register dst_reg = as_Register($dst$$reg);
+ Register src1_reg = as_Register($src1$$reg);
+ Register src2_reg = as_Register($src2$$reg);
+ __ corrected_idivq(dst_reg, src1_reg, src2_reg, true);
+ %}
+
+ enc_class riscv_enc_tail_call(iRegP jump_target) %{
+ C2_MacroAssembler _masm(&cbuf);
+ Assembler::CompressibleRegion cr(&_masm);
+ Register target_reg = as_Register($jump_target$$reg);
+ __ jr(target_reg);
+ %}
+
+ enc_class riscv_enc_tail_jmp(iRegP jump_target) %{
+ C2_MacroAssembler _masm(&cbuf);
+ Assembler::CompressibleRegion cr(&_masm);
+ Register target_reg = as_Register($jump_target$$reg);
+ // exception oop should be in x10
+ // ret addr has been popped into ra
+ // callee expects it in x13
+ __ mv(x13, ra);
+ __ jr(target_reg);
+ %}
+
+ enc_class riscv_enc_rethrow() %{
+ C2_MacroAssembler _masm(&cbuf);
+ __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
+ %}
+
+ enc_class riscv_enc_ret() %{
+ C2_MacroAssembler _masm(&cbuf);
+ Assembler::CompressibleRegion cr(&_masm);
+ __ ret();
+ %}
+
+%}
+
+//----------FRAME--------------------------------------------------------------
+// Definition of frame structure and management information.
+//
+// S T A C K L A Y O U T Allocators stack-slot number
+// | (to get allocators register number
+// G Owned by | | v add OptoReg::stack0())
+// r CALLER | |
+// o | +--------+ pad to even-align allocators stack-slot
+// w V | pad0 | numbers; owned by CALLER
+// t -----------+--------+----> Matcher::_in_arg_limit, unaligned
+// h ^ | in | 5
+// | | args | 4 Holes in incoming args owned by SELF
+// | | | | 3
+// | | +--------+
+// V | | old out| Empty on Intel, window on Sparc
+// | old |preserve| Must be even aligned.
+// | SP-+--------+----> Matcher::_old_SP, even aligned
+// | | in | 3 area for Intel ret address
+// Owned by |preserve| Empty on Sparc.
+// SELF +--------+
+// | | pad2 | 2 pad to align old SP
+// | +--------+ 1
+// | | locks | 0
+// | +--------+----> OptoReg::stack0(), even aligned
+// | | pad1 | 11 pad to align new SP
+// | +--------+
+// | | | 10
+// | | spills | 9 spills
+// V | | 8 (pad0 slot for callee)
+// -----------+--------+----> Matcher::_out_arg_limit, unaligned
+// ^ | out | 7
+// | | args | 6 Holes in outgoing args owned by CALLEE
+// Owned by +--------+
+// CALLEE | new out| 6 Empty on Intel, window on Sparc
+// | new |preserve| Must be even-aligned.
+// | SP-+--------+----> Matcher::_new_SP, even aligned
+// | | |
+//
+// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
+// known from SELF's arguments and the Java calling convention.
+// Region 6-7 is determined per call site.
+// Note 2: If the calling convention leaves holes in the incoming argument
+// area, those holes are owned by SELF. Holes in the outgoing area
+// are owned by the CALLEE. Holes should not be nessecary in the
+// incoming area, as the Java calling convention is completely under
+// the control of the AD file. Doubles can be sorted and packed to
+// avoid holes. Holes in the outgoing arguments may be nessecary for
+// varargs C calling conventions.
+// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
+// even aligned with pad0 as needed.
+// Region 6 is even aligned. Region 6-7 is NOT even aligned;
+// (the latter is true on Intel but is it false on RISCV?)
+// region 6-11 is even aligned; it may be padded out more so that
+// the region from SP to FP meets the minimum stack alignment.
+// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
+// alignment. Region 11, pad1, may be dynamically extended so that
+// SP meets the minimum alignment.
+
+frame %{
+ // These three registers define part of the calling convention
+ // between compiled code and the interpreter.
+
+ // Inline Cache Register or methodOop for I2C.
+ inline_cache_reg(R31);
+
+ // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
+ cisc_spilling_operand_name(indOffset);
+
+ // Number of stack slots consumed by locking an object
+ // generate Compile::sync_stack_slots
+ // VMRegImpl::slots_per_word = wordSize / stack_slot_size = 8 / 4 = 2
+ sync_stack_slots(1 * VMRegImpl::slots_per_word);
+
+ // Compiled code's Frame Pointer
+ frame_pointer(R2);
+
+ // Interpreter stores its frame pointer in a register which is
+ // stored to the stack by I2CAdaptors.
+ // I2CAdaptors convert from interpreted java to compiled java.
+ interpreter_frame_pointer(R8);
+
+ // Stack alignment requirement
+ stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
+
+ // Number of outgoing stack slots killed above the out_preserve_stack_slots
+ // for calls to C. Supports the var-args backing area for register parms.
+ varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes / BytesPerInt);
+
+ // The after-PROLOG location of the return address. Location of
+ // return address specifies a type (REG or STACK) and a number
+ // representing the register number (i.e. - use a register name) or
+ // stack slot.
+ // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
+ // Otherwise, it is above the locks and verification slot and alignment word
+ // TODO this may well be correct but need to check why that - 2 is there
+ // ppc port uses 0 but we definitely need to allow for fixed_slots
+ // which folds in the space used for monitors
+ return_addr(STACK - 2 +
+ align_up((Compile::current()->in_preserve_stack_slots() +
+ Compile::current()->fixed_slots()),
+ stack_alignment_in_slots()));
+
+ // Location of compiled Java return values. Same as C for now.
+ return_value
+ %{
+ assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
+ "only return normal values");
+
+ static const int lo[Op_RegL + 1] = { // enum name
+ 0, // Op_Node
+ 0, // Op_Set
+ R10_num, // Op_RegN
+ R10_num, // Op_RegI
+ R10_num, // Op_RegP
+ F10_num, // Op_RegF
+ F10_num, // Op_RegD
+ R10_num // Op_RegL
+ };
+
+ static const int hi[Op_RegL + 1] = { // enum name
+ 0, // Op_Node
+ 0, // Op_Set
+ OptoReg::Bad, // Op_RegN
+ OptoReg::Bad, // Op_RegI
+ R10_H_num, // Op_RegP
+ OptoReg::Bad, // Op_RegF
+ F10_H_num, // Op_RegD
+ R10_H_num // Op_RegL
+ };
+
+ return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
+ %}
+%}
+
+//----------ATTRIBUTES---------------------------------------------------------
+//----------Operand Attributes-------------------------------------------------
+op_attrib op_cost(1); // Required cost attribute
+
+//----------Instruction Attributes---------------------------------------------
+ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute
+ins_attrib ins_size(32); // Required size attribute (in bits)
+ins_attrib ins_short_branch(0); // Required flag: is this instruction
+ // a non-matching short branch variant
+ // of some long branch?
+ins_attrib ins_alignment(4); // Required alignment attribute (must
+ // be a power of 2) specifies the
+ // alignment that some part of the
+ // instruction (not necessarily the
+ // start) requires. If > 1, a
+ // compute_padding() function must be
+ // provided for the instruction
+
+//----------OPERANDS-----------------------------------------------------------
+// Operand definitions must precede instruction definitions for correct parsing
+// in the ADLC because operands constitute user defined types which are used in
+// instruction definitions.
+
+//----------Simple Operands----------------------------------------------------
+
+// Integer operands 32 bit
+// 32 bit immediate
+operand immI()
+%{
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// 32 bit zero
+operand immI0()
+%{
+ predicate(n->get_int() == 0);
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// 32 bit unit increment
+operand immI_1()
+%{
+ predicate(n->get_int() == 1);
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// 32 bit unit decrement
+operand immI_M1()
+%{
+ predicate(n->get_int() == -1);
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Unsigned Integer Immediate: 6-bit int, greater than 32
+operand uimmI6_ge32() %{
+ predicate(((unsigned int)(n->get_int()) < 64) && (n->get_int() >= 32));
+ match(ConI);
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immI_le_4()
+%{
+ predicate(n->get_int() <= 4);
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immI_16()
+%{
+ predicate(n->get_int() == 16);
+ match(ConI);
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immI_24()
+%{
+ predicate(n->get_int() == 24);
+ match(ConI);
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immI_31()
+%{
+ predicate(n->get_int() == 31);
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immI_63()
+%{
+ predicate(n->get_int() == 63);
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// 32 bit integer valid for add immediate
+operand immIAdd()
+%{
+ predicate(Assembler::operand_valid_for_add_immediate((int64_t)n->get_int()));
+ match(ConI);
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// 32 bit integer valid for sub immediate
+operand immISub()
+%{
+ predicate(Assembler::operand_valid_for_add_immediate(-(int64_t)n->get_int()));
+ match(ConI);
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// 5 bit signed value.
+operand immI5()
+%{
+ predicate(n->get_int() <= 15 && n->get_int() >= -16);
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// 5 bit signed value (simm5)
+operand immL5()
+%{
+ predicate(n->get_long() <= 15 && n->get_long() >= -16);
+ match(ConL);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer operands 64 bit
+// 64 bit immediate
+operand immL()
+%{
+ match(ConL);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// 64 bit zero
+operand immL0()
+%{
+ predicate(n->get_long() == 0);
+ match(ConL);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Pointer operands
+// Pointer Immediate
+operand immP()
+%{
+ match(ConP);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// NULL Pointer Immediate
+operand immP0()
+%{
+ predicate(n->get_ptr() == 0);
+ match(ConP);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Pointer Immediate One
+// this is used in object initialization (initial object header)
+operand immP_1()
+%{
+ predicate(n->get_ptr() == 1);
+ match(ConP);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Card Table Byte Map Base
+operand immByteMapBase()
+%{
+ // Get base of card map
+ predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
+ (CardTable::CardValue*)n->get_ptr() ==
+ ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
+ match(ConP);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Int Immediate: low 16-bit mask
+operand immI_16bits()
+%{
+ predicate(n->get_int() == 0xFFFF);
+ match(ConI);
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Long Immediate: low 32-bit mask
+operand immL_32bits()
+%{
+ predicate(n->get_long() == 0xFFFFFFFFL);
+ match(ConL);
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// 64 bit unit decrement
+operand immL_M1()
+%{
+ predicate(n->get_long() == -1);
+ match(ConL);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+
+// 32 bit offset of pc in thread anchor
+
+operand immL_pc_off()
+%{
+ predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
+ in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
+ match(ConL);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// 64 bit integer valid for add immediate
+operand immLAdd()
+%{
+ predicate(Assembler::operand_valid_for_add_immediate(n->get_long()));
+ match(ConL);
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// 64 bit integer valid for sub immediate
+operand immLSub()
+%{
+ predicate(Assembler::operand_valid_for_add_immediate(-(n->get_long())));
+ match(ConL);
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Narrow pointer operands
+// Narrow Pointer Immediate
+operand immN()
+%{
+ match(ConN);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Narrow NULL Pointer Immediate
+operand immN0()
+%{
+ predicate(n->get_narrowcon() == 0);
+ match(ConN);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immNKlass()
+%{
+ match(ConNKlass);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Float and Double operands
+// Double Immediate
+operand immD()
+%{
+ match(ConD);
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Double Immediate: +0.0d
+operand immD0()
+%{
+ predicate(jlong_cast(n->getd()) == 0);
+ match(ConD);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Float Immediate
+operand immF()
+%{
+ match(ConF);
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Float Immediate: +0.0f.
+operand immF0()
+%{
+ predicate(jint_cast(n->getf()) == 0);
+ match(ConF);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immIOffset()
+%{
+ predicate(is_imm_in_range(n->get_int(), 12, 0));
+ match(ConI);
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immLOffset()
+%{
+ predicate(is_imm_in_range(n->get_long(), 12, 0));
+ match(ConL);
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Scale values
+operand immIScale()
+%{
+ predicate(1 <= n->get_int() && (n->get_int() <= 3));
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// Integer 32 bit Register Operands
+operand iRegI()
+%{
+ constraint(ALLOC_IN_RC(any_reg32));
+ match(RegI);
+ match(iRegINoSp);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Integer 32 bit Register not Special
+operand iRegINoSp()
+%{
+ constraint(ALLOC_IN_RC(no_special_reg32));
+ match(RegI);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Register R10 only
+operand iRegI_R10()
+%{
+ constraint(ALLOC_IN_RC(int_r10_reg));
+ match(RegI);
+ match(iRegINoSp);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Register R12 only
+operand iRegI_R12()
+%{
+ constraint(ALLOC_IN_RC(int_r12_reg));
+ match(RegI);
+ match(iRegINoSp);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Register R13 only
+operand iRegI_R13()
+%{
+ constraint(ALLOC_IN_RC(int_r13_reg));
+ match(RegI);
+ match(iRegINoSp);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Register R14 only
+operand iRegI_R14()
+%{
+ constraint(ALLOC_IN_RC(int_r14_reg));
+ match(RegI);
+ match(iRegINoSp);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Integer 64 bit Register Operands
+operand iRegL()
+%{
+ constraint(ALLOC_IN_RC(any_reg));
+ match(RegL);
+ match(iRegLNoSp);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Integer 64 bit Register not Special
+operand iRegLNoSp()
+%{
+ constraint(ALLOC_IN_RC(no_special_reg));
+ match(RegL);
+ match(iRegL_R10);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Long 64 bit Register R28 only
+operand iRegL_R28()
+%{
+ constraint(ALLOC_IN_RC(r28_reg));
+ match(RegL);
+ match(iRegLNoSp);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Long 64 bit Register R29 only
+operand iRegL_R29()
+%{
+ constraint(ALLOC_IN_RC(r29_reg));
+ match(RegL);
+ match(iRegLNoSp);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Long 64 bit Register R30 only
+operand iRegL_R30()
+%{
+ constraint(ALLOC_IN_RC(r30_reg));
+ match(RegL);
+ match(iRegLNoSp);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Pointer Register Operands
+// Pointer Register
+operand iRegP()
+%{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(RegP);
+ match(iRegPNoSp);
+ match(iRegP_R10);
+ match(javaThread_RegP);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Pointer 64 bit Register not Special
+operand iRegPNoSp()
+%{
+ constraint(ALLOC_IN_RC(no_special_ptr_reg));
+ match(RegP);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand iRegP_R10()
+%{
+ constraint(ALLOC_IN_RC(r10_reg));
+ match(RegP);
+ // match(iRegP);
+ match(iRegPNoSp);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Pointer 64 bit Register R11 only
+operand iRegP_R11()
+%{
+ constraint(ALLOC_IN_RC(r11_reg));
+ match(RegP);
+ match(iRegPNoSp);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand iRegP_R12()
+%{
+ constraint(ALLOC_IN_RC(r12_reg));
+ match(RegP);
+ // match(iRegP);
+ match(iRegPNoSp);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Pointer 64 bit Register R13 only
+operand iRegP_R13()
+%{
+ constraint(ALLOC_IN_RC(r13_reg));
+ match(RegP);
+ match(iRegPNoSp);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand iRegP_R14()
+%{
+ constraint(ALLOC_IN_RC(r14_reg));
+ match(RegP);
+ // match(iRegP);
+ match(iRegPNoSp);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand iRegP_R15()
+%{
+ constraint(ALLOC_IN_RC(r15_reg));
+ match(RegP);
+ // match(iRegP);
+ match(iRegPNoSp);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand iRegP_R16()
+%{
+ constraint(ALLOC_IN_RC(r16_reg));
+ match(RegP);
+ // match(iRegP);
+ match(iRegPNoSp);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Pointer 64 bit Register R28 only
+operand iRegP_R28()
+%{
+ constraint(ALLOC_IN_RC(r28_reg));
+ match(RegP);
+ match(iRegPNoSp);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Pointer Register Operands
+// Narrow Pointer Register
+operand iRegN()
+%{
+ constraint(ALLOC_IN_RC(any_reg32));
+ match(RegN);
+ match(iRegNNoSp);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Integer 64 bit Register not Special
+operand iRegNNoSp()
+%{
+ constraint(ALLOC_IN_RC(no_special_reg32));
+ match(RegN);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// heap base register -- used for encoding immN0
+operand iRegIHeapbase()
+%{
+ constraint(ALLOC_IN_RC(heapbase_reg));
+ match(RegI);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Long 64 bit Register R10 only
+operand iRegL_R10()
+%{
+ constraint(ALLOC_IN_RC(r10_reg));
+ match(RegL);
+ match(iRegLNoSp);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Float Register
+// Float register operands
+operand fRegF()
+%{
+ constraint(ALLOC_IN_RC(float_reg));
+ match(RegF);
+
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Double Register
+// Double register operands
+operand fRegD()
+%{
+ constraint(ALLOC_IN_RC(double_reg));
+ match(RegD);
+
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Generic vector class. This will be used for
+// all vector operands.
+operand vReg()
+%{
+ constraint(ALLOC_IN_RC(vectora_reg));
+ match(VecA);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand vReg_V1()
+%{
+ constraint(ALLOC_IN_RC(v1_reg));
+ match(VecA);
+ match(vReg);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand vReg_V2()
+%{
+ constraint(ALLOC_IN_RC(v2_reg));
+ match(VecA);
+ match(vReg);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand vReg_V3()
+%{
+ constraint(ALLOC_IN_RC(v3_reg));
+ match(VecA);
+ match(vReg);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand vReg_V4()
+%{
+ constraint(ALLOC_IN_RC(v4_reg));
+ match(VecA);
+ match(vReg);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand vReg_V5()
+%{
+ constraint(ALLOC_IN_RC(v5_reg));
+ match(VecA);
+ match(vReg);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Java Thread Register
+operand javaThread_RegP(iRegP reg)
+%{
+ constraint(ALLOC_IN_RC(java_thread_reg)); // java_thread_reg
+ match(reg);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+//----------Memory Operands----------------------------------------------------
+// RISCV has only base_plus_offset and literal address mode, so no need to use
+// index and scale. Here set index as 0xffffffff and scale as 0x0.
+operand indirect(iRegP reg)
+%{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(reg);
+ op_cost(0);
+ format %{ "[$reg]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index(0xffffffff);
+ scale(0x0);
+ disp(0x0);
+ %}
+%}
+
+operand indOffI(iRegP reg, immIOffset off)
+%{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP reg off);
+ op_cost(0);
+ format %{ "[$reg, $off]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index(0xffffffff);
+ scale(0x0);
+ disp($off);
+ %}
+%}
+
+operand indOffL(iRegP reg, immLOffset off)
+%{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP reg off);
+ op_cost(0);
+ format %{ "[$reg, $off]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index(0xffffffff);
+ scale(0x0);
+ disp($off);
+ %}
+%}
+
+operand indirectN(iRegN reg)
+%{
+ predicate(CompressedOops::shift() == 0);
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(DecodeN reg);
+ op_cost(0);
+ format %{ "[$reg]\t# narrow" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index(0xffffffff);
+ scale(0x0);
+ disp(0x0);
+ %}
+%}
+
+operand indOffIN(iRegN reg, immIOffset off)
+%{
+ predicate(CompressedOops::shift() == 0);
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP (DecodeN reg) off);
+ op_cost(0);
+ format %{ "[$reg, $off]\t# narrow" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index(0xffffffff);
+ scale(0x0);
+ disp($off);
+ %}
+%}
+
+operand indOffLN(iRegN reg, immLOffset off)
+%{
+ predicate(CompressedOops::shift() == 0);
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP (DecodeN reg) off);
+ op_cost(0);
+ format %{ "[$reg, $off]\t# narrow" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index(0xffffffff);
+ scale(0x0);
+ disp($off);
+ %}
+%}
+
+// RISCV opto stubs need to write to the pc slot in the thread anchor
+operand thread_anchor_pc(javaThread_RegP reg, immL_pc_off off)
+%{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP reg off);
+ op_cost(0);
+ format %{ "[$reg, $off]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index(0xffffffff);
+ scale(0x0);
+ disp($off);
+ %}
+%}
+
+
+//----------Special Memory Operands--------------------------------------------
+// Stack Slot Operand - This operand is used for loading and storing temporary
+// values on the stack where a match requires a value to
+// flow through memory.
+operand stackSlotI(sRegI reg)
+%{
+ constraint(ALLOC_IN_RC(stack_slots));
+ // No match rule because this operand is only generated in matching
+ // match(RegI);
+ format %{ "[$reg]" %}
+ interface(MEMORY_INTER) %{
+ base(0x02); // RSP
+ index(0xffffffff); // No Index
+ scale(0x0); // No Scale
+ disp($reg); // Stack Offset
+ %}
+%}
+
+operand stackSlotF(sRegF reg)
+%{
+ constraint(ALLOC_IN_RC(stack_slots));
+ // No match rule because this operand is only generated in matching
+ // match(RegF);
+ format %{ "[$reg]" %}
+ interface(MEMORY_INTER) %{
+ base(0x02); // RSP
+ index(0xffffffff); // No Index
+ scale(0x0); // No Scale
+ disp($reg); // Stack Offset
+ %}
+%}
+
+operand stackSlotD(sRegD reg)
+%{
+ constraint(ALLOC_IN_RC(stack_slots));
+ // No match rule because this operand is only generated in matching
+ // match(RegD);
+ format %{ "[$reg]" %}
+ interface(MEMORY_INTER) %{
+ base(0x02); // RSP
+ index(0xffffffff); // No Index
+ scale(0x0); // No Scale
+ disp($reg); // Stack Offset
+ %}
+%}
+
+operand stackSlotL(sRegL reg)
+%{
+ constraint(ALLOC_IN_RC(stack_slots));
+ // No match rule because this operand is only generated in matching
+ // match(RegL);
+ format %{ "[$reg]" %}
+ interface(MEMORY_INTER) %{
+ base(0x02); // RSP
+ index(0xffffffff); // No Index
+ scale(0x0); // No Scale
+ disp($reg); // Stack Offset
+ %}
+%}
+
+// Special operand allowing long args to int ops to be truncated for free
+
+operand iRegL2I(iRegL reg) %{
+
+ op_cost(0);
+
+ match(ConvL2I reg);
+
+ format %{ "l2i($reg)" %}
+
+ interface(REG_INTER)
+%}
+
+
+// Comparison Operands
+// NOTE: Label is a predefined operand which should not be redefined in
+// the AD file. It is generically handled within the ADLC.
+
+//----------Conditional Branch Operands----------------------------------------
+// Comparison Op - This is the operation of the comparison, and is limited to
+// the following set of codes:
+// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
+//
+// Other attributes of the comparison, such as unsignedness, are specified
+// by the comparison instruction that sets a condition code flags register.
+// That result is represented by a flags operand whose subtype is appropriate
+// to the unsignedness (etc.) of the comparison.
+//
+// Later, the instruction which matches both the Comparison Op (a Bool) and
+// the flags (produced by the Cmp) specifies the coding of the comparison op
+// by matching a specific subtype of Bool operand below, such as cmpOpU.
+
+
+// used for signed integral comparisons and fp comparisons
+operand cmpOp()
+%{
+ match(Bool);
+
+ format %{ "" %}
+
+ // the values in interface derives from struct BoolTest::mask
+ interface(COND_INTER) %{
+ equal(0x0, "eq");
+ greater(0x1, "gt");
+ overflow(0x2, "overflow");
+ less(0x3, "lt");
+ not_equal(0x4, "ne");
+ less_equal(0x5, "le");
+ no_overflow(0x6, "no_overflow");
+ greater_equal(0x7, "ge");
+ %}
+%}
+
+// used for unsigned integral comparisons
+operand cmpOpU()
+%{
+ match(Bool);
+
+ format %{ "" %}
+ // the values in interface derives from struct BoolTest::mask
+ interface(COND_INTER) %{
+ equal(0x0, "eq");
+ greater(0x1, "gtu");
+ overflow(0x2, "overflow");
+ less(0x3, "ltu");
+ not_equal(0x4, "ne");
+ less_equal(0x5, "leu");
+ no_overflow(0x6, "no_overflow");
+ greater_equal(0x7, "geu");
+ %}
+%}
+
+// used for certain integral comparisons which can be
+// converted to bxx instructions
+operand cmpOpEqNe()
+%{
+ match(Bool);
+ op_cost(0);
+ predicate(n->as_Bool()->_test._test == BoolTest::ne ||
+ n->as_Bool()->_test._test == BoolTest::eq);
+
+ format %{ "" %}
+ interface(COND_INTER) %{
+ equal(0x0, "eq");
+ greater(0x1, "gt");
+ overflow(0x2, "overflow");
+ less(0x3, "lt");
+ not_equal(0x4, "ne");
+ less_equal(0x5, "le");
+ no_overflow(0x6, "no_overflow");
+ greater_equal(0x7, "ge");
+ %}
+%}
+
+operand cmpOpULtGe()
+%{
+ match(Bool);
+ op_cost(0);
+ predicate(n->as_Bool()->_test._test == BoolTest::lt ||
+ n->as_Bool()->_test._test == BoolTest::ge);
+
+ format %{ "" %}
+ interface(COND_INTER) %{
+ equal(0x0, "eq");
+ greater(0x1, "gt");
+ overflow(0x2, "overflow");
+ less(0x3, "lt");
+ not_equal(0x4, "ne");
+ less_equal(0x5, "le");
+ no_overflow(0x6, "no_overflow");
+ greater_equal(0x7, "ge");
+ %}
+%}
+
+operand cmpOpUEqNeLeGt()
+%{
+ match(Bool);
+ op_cost(0);
+ predicate(n->as_Bool()->_test._test == BoolTest::ne ||
+ n->as_Bool()->_test._test == BoolTest::eq ||
+ n->as_Bool()->_test._test == BoolTest::le ||
+ n->as_Bool()->_test._test == BoolTest::gt);
+
+ format %{ "" %}
+ interface(COND_INTER) %{
+ equal(0x0, "eq");
+ greater(0x1, "gt");
+ overflow(0x2, "overflow");
+ less(0x3, "lt");
+ not_equal(0x4, "ne");
+ less_equal(0x5, "le");
+ no_overflow(0x6, "no_overflow");
+ greater_equal(0x7, "ge");
+ %}
+%}
+
+
+// Flags register, used as output of compare logic
+operand rFlagsReg()
+%{
+ constraint(ALLOC_IN_RC(reg_flags));
+ match(RegFlags);
+
+ op_cost(0);
+ format %{ "RFLAGS" %}
+ interface(REG_INTER);
+%}
+
+// Special Registers
+
+// Method Register
+operand inline_cache_RegP(iRegP reg)
+%{
+ constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
+ match(reg);
+ match(iRegPNoSp);
+ op_cost(0);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+//----------OPERAND CLASSES----------------------------------------------------
+// Operand Classes are groups of operands that are used as to simplify
+// instruction definitions by not requiring the AD writer to specify
+// separate instructions for every form of operand when the
+// instruction accepts multiple operand types with the same basic
+// encoding and format. The classic case of this is memory operands.
+
+// memory is used to define read/write location for load/store
+// instruction defs. we can turn a memory op into an Address
+
+opclass memory(indirect, indOffI, indOffL, indirectN, indOffIN, indOffLN);
+
+// iRegIorL2I is used for src inputs in rules for 32 bit int (I)
+// operations. it allows the src to be either an iRegI or a (ConvL2I
+// iRegL). in the latter case the l2i normally planted for a ConvL2I
+// can be elided because the 32-bit instruction will just employ the
+// lower 32 bits anyway.
+//
+// n.b. this does not elide all L2I conversions. if the truncated
+// value is consumed by more than one operation then the ConvL2I
+// cannot be bundled into the consuming nodes so an l2i gets planted
+// (actually a mvw $dst $src) and the downstream instructions consume
+// the result of the l2i as an iRegI input. That's a shame since the
+// mvw is actually redundant but its not too costly.
+
+opclass iRegIorL2I(iRegI, iRegL2I);
+opclass iRegIorL(iRegI, iRegL);
+opclass iRegNorP(iRegN, iRegP);
+opclass iRegILNP(iRegI, iRegL, iRegN, iRegP);
+opclass iRegILNPNoSp(iRegINoSp, iRegLNoSp, iRegNNoSp, iRegPNoSp);
+opclass immIorL(immI, immL);
+
+//----------PIPELINE-----------------------------------------------------------
+// Rules which define the behavior of the target architectures pipeline.
+
+// For specific pipelines, e.g. generic RISC-V, define the stages of that pipeline
+//pipe_desc(ID, EX, MEM, WR);
+#define ID S0
+#define EX S1
+#define MEM S2
+#define WR S3
+
+// Integer ALU reg operation
+pipeline %{
+
+attributes %{
+ // RISC-V instructions are of fixed length
+ fixed_size_instructions; // Fixed size instructions TODO does
+ max_instructions_per_bundle = 2; // Generic RISC-V 1, Sifive Series 7 2
+ // RISC-V instructions come in 32-bit word units
+ instruction_unit_size = 4; // An instruction is 4 bytes long
+ instruction_fetch_unit_size = 64; // The processor fetches one line
+ instruction_fetch_units = 1; // of 64 bytes
+
+ // List of nop instructions
+ nops( MachNop );
+%}
+
+// We don't use an actual pipeline model so don't care about resources
+// or description. we do use pipeline classes to introduce fixed
+// latencies
+
+//----------RESOURCES----------------------------------------------------------
+// Resources are the functional units available to the machine
+
+// Generic RISC-V pipeline
+// 1 decoder
+// 1 instruction decoded per cycle
+// 1 load/store ops per cycle, 1 branch, 1 FPU
+// 1 mul, 1 div
+
+resources ( DECODE,
+ ALU,
+ MUL,
+ DIV,
+ BRANCH,
+ LDST,
+ FPU);
+
+//----------PIPELINE DESCRIPTION-----------------------------------------------
+// Pipeline Description specifies the stages in the machine's pipeline
+
+// Define the pipeline as a generic 6 stage pipeline
+pipe_desc(S0, S1, S2, S3, S4, S5);
+
+//----------PIPELINE CLASSES---------------------------------------------------
+// Pipeline Classes describe the stages in which input and output are
+// referenced by the hardware pipeline.
+
+pipe_class fp_dop_reg_reg_s(fRegF dst, fRegF src1, fRegF src2)
+%{
+ single_instruction;
+ src1 : S1(read);
+ src2 : S2(read);
+ dst : S5(write);
+ DECODE : ID;
+ FPU : S5;
+%}
+
+pipe_class fp_dop_reg_reg_d(fRegD dst, fRegD src1, fRegD src2)
+%{
+ src1 : S1(read);
+ src2 : S2(read);
+ dst : S5(write);
+ DECODE : ID;
+ FPU : S5;
+%}
+
+pipe_class fp_uop_s(fRegF dst, fRegF src)
+%{
+ single_instruction;
+ src : S1(read);
+ dst : S5(write);
+ DECODE : ID;
+ FPU : S5;
+%}
+
+pipe_class fp_uop_d(fRegD dst, fRegD src)
+%{
+ single_instruction;
+ src : S1(read);
+ dst : S5(write);
+ DECODE : ID;
+ FPU : S5;
+%}
+
+pipe_class fp_d2f(fRegF dst, fRegD src)
+%{
+ single_instruction;
+ src : S1(read);
+ dst : S5(write);
+ DECODE : ID;
+ FPU : S5;
+%}
+
+pipe_class fp_f2d(fRegD dst, fRegF src)
+%{
+ single_instruction;
+ src : S1(read);
+ dst : S5(write);
+ DECODE : ID;
+ FPU : S5;
+%}
+
+pipe_class fp_f2i(iRegINoSp dst, fRegF src)
+%{
+ single_instruction;
+ src : S1(read);
+ dst : S5(write);
+ DECODE : ID;
+ FPU : S5;
+%}
+
+pipe_class fp_f2l(iRegLNoSp dst, fRegF src)
+%{
+ single_instruction;
+ src : S1(read);
+ dst : S5(write);
+ DECODE : ID;
+ FPU : S5;
+%}
+
+pipe_class fp_i2f(fRegF dst, iRegIorL2I src)
+%{
+ single_instruction;
+ src : S1(read);
+ dst : S5(write);
+ DECODE : ID;
+ FPU : S5;
+%}
+
+pipe_class fp_l2f(fRegF dst, iRegL src)
+%{
+ single_instruction;
+ src : S1(read);
+ dst : S5(write);
+ DECODE : ID;
+ FPU : S5;
+%}
+
+pipe_class fp_d2i(iRegINoSp dst, fRegD src)
+%{
+ single_instruction;
+ src : S1(read);
+ dst : S5(write);
+ DECODE : ID;
+ FPU : S5;
+%}
+
+pipe_class fp_d2l(iRegLNoSp dst, fRegD src)
+%{
+ single_instruction;
+ src : S1(read);
+ dst : S5(write);
+ DECODE : ID;
+ FPU : S5;
+%}
+
+pipe_class fp_i2d(fRegD dst, iRegIorL2I src)
+%{
+ single_instruction;
+ src : S1(read);
+ dst : S5(write);
+ DECODE : ID;
+ FPU : S5;
+%}
+
+pipe_class fp_l2d(fRegD dst, iRegIorL2I src)
+%{
+ single_instruction;
+ src : S1(read);
+ dst : S5(write);
+ DECODE : ID;
+ FPU : S5;
+%}
+
+pipe_class fp_div_s(fRegF dst, fRegF src1, fRegF src2)
+%{
+ single_instruction;
+ src1 : S1(read);
+ src2 : S2(read);
+ dst : S5(write);
+ DECODE : ID;
+ FPU : S5;
+%}
+
+pipe_class fp_div_d(fRegD dst, fRegD src1, fRegD src2)
+%{
+ single_instruction;
+ src1 : S1(read);
+ src2 : S2(read);
+ dst : S5(write);
+ DECODE : ID;
+ FPU : S5;
+%}
+
+pipe_class fp_sqrt_s(fRegF dst, fRegF src1, fRegF src2)
+%{
+ single_instruction;
+ src1 : S1(read);
+ src2 : S2(read);
+ dst : S5(write);
+ DECODE : ID;
+ FPU : S5;
+%}
+
+pipe_class fp_sqrt_d(fRegD dst, fRegD src1, fRegD src2)
+%{
+ single_instruction;
+ src1 : S1(read);
+ src2 : S2(read);
+ dst : S5(write);
+ DECODE : ID;
+ FPU : S5;
+%}
+
+pipe_class fp_load_constant_s(fRegF dst)
+%{
+ single_instruction;
+ dst : S5(write);
+ DECODE : ID;
+ FPU : S5;
+%}
+
+pipe_class fp_load_constant_d(fRegD dst)
+%{
+ single_instruction;
+ dst : S5(write);
+ DECODE : ID;
+ FPU : S5;
+%}
+
+pipe_class fp_load_mem_s(fRegF dst, memory mem)
+%{
+ single_instruction;
+ mem : S1(read);
+ dst : S5(write);
+ DECODE : ID;
+ LDST : MEM;
+%}
+
+pipe_class fp_load_mem_d(fRegD dst, memory mem)
+%{
+ single_instruction;
+ mem : S1(read);
+ dst : S5(write);
+ DECODE : ID;
+ LDST : MEM;
+%}
+
+pipe_class fp_store_reg_s(fRegF src, memory mem)
+%{
+ single_instruction;
+ src : S1(read);
+ mem : S5(write);
+ DECODE : ID;
+ LDST : MEM;
+%}
+
+pipe_class fp_store_reg_d(fRegD src, memory mem)
+%{
+ single_instruction;
+ src : S1(read);
+ mem : S5(write);
+ DECODE : ID;
+ LDST : MEM;
+%}
+
+//------- Integer ALU operations --------------------------
+
+// Integer ALU reg-reg operation
+// Operands needs in ID, result generated in EX
+// E.g. ADD Rd, Rs1, Rs2
+pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+ single_instruction;
+ dst : EX(write);
+ src1 : ID(read);
+ src2 : ID(read);
+ DECODE : ID;
+ ALU : EX;
+%}
+
+// Integer ALU reg operation with constant shift
+// E.g. SLLI Rd, Rs1, #shift
+pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
+%{
+ single_instruction;
+ dst : EX(write);
+ src1 : ID(read);
+ DECODE : ID;
+ ALU : EX;
+%}
+
+// Integer ALU reg-reg operation with variable shift
+// both operands must be available in ID
+// E.g. SLL Rd, Rs1, Rs2
+pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
+%{
+ single_instruction;
+ dst : EX(write);
+ src1 : ID(read);
+ src2 : ID(read);
+ DECODE : ID;
+ ALU : EX;
+%}
+
+// Integer ALU reg operation
+// E.g. NEG Rd, Rs2
+pipe_class ialu_reg(iRegI dst, iRegI src)
+%{
+ single_instruction;
+ dst : EX(write);
+ src : ID(read);
+ DECODE : ID;
+ ALU : EX;
+%}
+
+// Integer ALU reg immediate operation
+// E.g. ADDI Rd, Rs1, #imm
+pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
+%{
+ single_instruction;
+ dst : EX(write);
+ src1 : ID(read);
+ DECODE : ID;
+ ALU : EX;
+%}
+
+// Integer ALU immediate operation (no source operands)
+// E.g. LI Rd, #imm
+pipe_class ialu_imm(iRegI dst)
+%{
+ single_instruction;
+ dst : EX(write);
+ DECODE : ID;
+ ALU : EX;
+%}
+
+//------- Multiply pipeline operations --------------------
+
+// Multiply reg-reg
+// E.g. MULW Rd, Rs1, Rs2
+pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+ single_instruction;
+ dst : WR(write);
+ src1 : ID(read);
+ src2 : ID(read);
+ DECODE : ID;
+ MUL : WR;
+%}
+
+// E.g. MUL RD, Rs1, Rs2
+pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+ single_instruction;
+ fixed_latency(3); // Maximum latency for 64 bit mul
+ dst : WR(write);
+ src1 : ID(read);
+ src2 : ID(read);
+ DECODE : ID;
+ MUL : WR;
+%}
+
+//------- Divide pipeline operations --------------------
+
+// E.g. DIVW Rd, Rs1, Rs2
+pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+ single_instruction;
+ fixed_latency(8); // Maximum latency for 32 bit divide
+ dst : WR(write);
+ src1 : ID(read);
+ src2 : ID(read);
+ DECODE : ID;
+ DIV : WR;
+%}
+
+// E.g. DIV RD, Rs1, Rs2
+pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+ single_instruction;
+ fixed_latency(16); // Maximum latency for 64 bit divide
+ dst : WR(write);
+ src1 : ID(read);
+ src2 : ID(read);
+ DECODE : ID;
+ DIV : WR;
+%}
+
+//------- Load pipeline operations ------------------------
+
+// Load - reg, mem
+// E.g. LA Rd, mem
+pipe_class iload_reg_mem(iRegI dst, memory mem)
+%{
+ single_instruction;
+ dst : WR(write);
+ mem : ID(read);
+ DECODE : ID;
+ LDST : MEM;
+%}
+
+// Load - reg, reg
+// E.g. LD Rd, Rs
+pipe_class iload_reg_reg(iRegI dst, iRegI src)
+%{
+ single_instruction;
+ dst : WR(write);
+ src : ID(read);
+ DECODE : ID;
+ LDST : MEM;
+%}
+
+//------- Store pipeline operations -----------------------
+
+// Store - zr, mem
+// E.g. SD zr, mem
+pipe_class istore_mem(memory mem)
+%{
+ single_instruction;
+ mem : ID(read);
+ DECODE : ID;
+ LDST : MEM;
+%}
+
+// Store - reg, mem
+// E.g. SD Rs, mem
+pipe_class istore_reg_mem(iRegI src, memory mem)
+%{
+ single_instruction;
+ mem : ID(read);
+ src : EX(read);
+ DECODE : ID;
+ LDST : MEM;
+%}
+
+// Store - reg, reg
+// E.g. SD Rs2, Rs1
+pipe_class istore_reg_reg(iRegI dst, iRegI src)
+%{
+ single_instruction;
+ dst : ID(read);
+ src : EX(read);
+ DECODE : ID;
+ LDST : MEM;
+%}
+
+//------- Store pipeline operations -----------------------
+
+// Branch
+pipe_class pipe_branch()
+%{
+ single_instruction;
+ DECODE : ID;
+ BRANCH : EX;
+%}
+
+// Branch
+pipe_class pipe_branch_reg(iRegI src)
+%{
+ single_instruction;
+ src : ID(read);
+ DECODE : ID;
+ BRANCH : EX;
+%}
+
+// Compare & Branch
+// E.g. BEQ Rs1, Rs2, L
+pipe_class pipe_cmp_branch(iRegI src1, iRegI src2)
+%{
+ single_instruction;
+ src1 : ID(read);
+ src2 : ID(read);
+ DECODE : ID;
+ BRANCH : EX;
+%}
+
+// E.g. BEQZ Rs, L
+pipe_class pipe_cmpz_branch(iRegI src)
+%{
+ single_instruction;
+ src : ID(read);
+ DECODE : ID;
+ BRANCH : EX;
+%}
+
+//------- Synchronisation operations ----------------------
+// Any operation requiring serialization
+// E.g. FENCE/Atomic Ops/Load Acquire/Store Release
+pipe_class pipe_serial()
+%{
+ single_instruction;
+ force_serialization;
+ fixed_latency(16);
+ DECODE : ID;
+ LDST : MEM;
+%}
+
+pipe_class pipe_slow()
+%{
+ instruction_count(10);
+ multiple_bundles;
+ force_serialization;
+ fixed_latency(16);
+ DECODE : ID;
+ LDST : MEM;
+%}
+
+// Empty pipeline class
+pipe_class pipe_class_empty()
+%{
+ single_instruction;
+ fixed_latency(0);
+%}
+
+// Default pipeline class.
+pipe_class pipe_class_default()
+%{
+ single_instruction;
+ fixed_latency(2);
+%}
+
+// Pipeline class for compares.
+pipe_class pipe_class_compare()
+%{
+ single_instruction;
+ fixed_latency(16);
+%}
+
+// Pipeline class for memory operations.
+pipe_class pipe_class_memory()
+%{
+ single_instruction;
+ fixed_latency(16);
+%}
+
+// Pipeline class for call.
+pipe_class pipe_class_call()
+%{
+ single_instruction;
+ fixed_latency(100);
+%}
+
+// Define the class for the Nop node.
+define %{
+ MachNop = pipe_class_empty;
+%}
+%}
+//----------INSTRUCTIONS-------------------------------------------------------
+//
+// match -- States which machine-independent subtree may be replaced
+// by this instruction.
+// ins_cost -- The estimated cost of this instruction is used by instruction
+// selection to identify a minimum cost tree of machine
+// instructions that matches a tree of machine-independent
+// instructions.
+// format -- A string providing the disassembly for this instruction.
+// The value of an instruction's operand may be inserted
+// by referring to it with a '$' prefix.
+// opcode -- Three instruction opcodes may be provided. These are referred
+// to within an encode class as $primary, $secondary, and $tertiary
+// rrspectively. The primary opcode is commonly used to
+// indicate the type of machine instruction, while secondary
+// and tertiary are often used for prefix options or addressing
+// modes.
+// ins_encode -- A list of encode classes with parameters. The encode class
+// name must have been defined in an 'enc_class' specification
+// in the encode section of the architecture description.
+
+// ============================================================================
+// Memory (Load/Store) Instructions
+
+// Load Instructions
+
+// Load Byte (8 bit signed)
+instruct loadB(iRegINoSp dst, memory mem)
+%{
+ match(Set dst (LoadB mem));
+
+ ins_cost(LOAD_COST);
+ format %{ "lb $dst, $mem\t# byte, #@loadB" %}
+
+ ins_encode %{
+ __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(iload_reg_mem);
+%}
+
+// Load Byte (8 bit signed) into long
+instruct loadB2L(iRegLNoSp dst, memory mem)
+%{
+ match(Set dst (ConvI2L (LoadB mem)));
+
+ ins_cost(LOAD_COST);
+ format %{ "lb $dst, $mem\t# byte, #@loadB2L" %}
+
+ ins_encode %{
+ __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(iload_reg_mem);
+%}
+
+// Load Byte (8 bit unsigned)
+instruct loadUB(iRegINoSp dst, memory mem)
+%{
+ match(Set dst (LoadUB mem));
+
+ ins_cost(LOAD_COST);
+ format %{ "lbu $dst, $mem\t# byte, #@loadUB" %}
+
+ ins_encode %{
+ __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(iload_reg_mem);
+%}
+
+// Load Byte (8 bit unsigned) into long
+instruct loadUB2L(iRegLNoSp dst, memory mem)
+%{
+ match(Set dst (ConvI2L (LoadUB mem)));
+
+ ins_cost(LOAD_COST);
+ format %{ "lbu $dst, $mem\t# byte, #@loadUB2L" %}
+
+ ins_encode %{
+ __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(iload_reg_mem);
+%}
+
+// Load Short (16 bit signed)
+instruct loadS(iRegINoSp dst, memory mem)
+%{
+ match(Set dst (LoadS mem));
+
+ ins_cost(LOAD_COST);
+ format %{ "lh $dst, $mem\t# short, #@loadS" %}
+
+ ins_encode %{
+ __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(iload_reg_mem);
+%}
+
+// Load Short (16 bit signed) into long
+instruct loadS2L(iRegLNoSp dst, memory mem)
+%{
+ match(Set dst (ConvI2L (LoadS mem)));
+
+ ins_cost(LOAD_COST);
+ format %{ "lh $dst, $mem\t# short, #@loadS2L" %}
+
+ ins_encode %{
+ __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(iload_reg_mem);
+%}
+
+// Load Char (16 bit unsigned)
+instruct loadUS(iRegINoSp dst, memory mem)
+%{
+ match(Set dst (LoadUS mem));
+
+ ins_cost(LOAD_COST);
+ format %{ "lhu $dst, $mem\t# short, #@loadUS" %}
+
+ ins_encode %{
+ __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(iload_reg_mem);
+%}
+
+// Load Short/Char (16 bit unsigned) into long
+instruct loadUS2L(iRegLNoSp dst, memory mem)
+%{
+ match(Set dst (ConvI2L (LoadUS mem)));
+
+ ins_cost(LOAD_COST);
+ format %{ "lhu $dst, $mem\t# short, #@loadUS2L" %}
+
+ ins_encode %{
+ __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(iload_reg_mem);
+%}
+
+// Load Integer (32 bit signed)
+instruct loadI(iRegINoSp dst, memory mem)
+%{
+ match(Set dst (LoadI mem));
+
+ ins_cost(LOAD_COST);
+ format %{ "lw $dst, $mem\t# int, #@loadI" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(iload_reg_mem);
+%}
+
+// Load Integer (32 bit signed) into long
+instruct loadI2L(iRegLNoSp dst, memory mem)
+%{
+ match(Set dst (ConvI2L (LoadI mem)));
+
+ ins_cost(LOAD_COST);
+ format %{ "lw $dst, $mem\t# int, #@loadI2L" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(iload_reg_mem);
+%}
+
+// Load Integer (32 bit unsigned) into long
+instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
+%{
+ match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
+
+ ins_cost(LOAD_COST);
+ format %{ "lwu $dst, $mem\t# int, #@loadUI2L" %}
+
+ ins_encode %{
+ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(iload_reg_mem);
+%}
+
+// Load Long (64 bit signed)
+instruct loadL(iRegLNoSp dst, memory mem)
+%{
+ match(Set dst (LoadL mem));
+
+ ins_cost(LOAD_COST);
+ format %{ "ld $dst, $mem\t# int, #@loadL" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(iload_reg_mem);
+%}
+
+// Load Range
+instruct loadRange(iRegINoSp dst, memory mem)
+%{
+ match(Set dst (LoadRange mem));
+
+ ins_cost(LOAD_COST);
+ format %{ "lwu $dst, $mem\t# range, #@loadRange" %}
+
+ ins_encode %{
+ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(iload_reg_mem);
+%}
+
+// Load Pointer
+instruct loadP(iRegPNoSp dst, memory mem)
+%{
+ match(Set dst (LoadP mem));
+ predicate(n->as_Load()->barrier_data() == 0);
+
+ ins_cost(LOAD_COST);
+ format %{ "ld $dst, $mem\t# ptr, #@loadP" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(iload_reg_mem);
+%}
+
+// Load Compressed Pointer
+instruct loadN(iRegNNoSp dst, memory mem)
+%{
+ match(Set dst (LoadN mem));
+
+ ins_cost(LOAD_COST);
+ format %{ "lwu $dst, $mem\t# loadN, compressed ptr, #@loadN" %}
+
+ ins_encode %{
+ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(iload_reg_mem);
+%}
+
+// Load Klass Pointer
+instruct loadKlass(iRegPNoSp dst, memory mem)
+%{
+ match(Set dst (LoadKlass mem));
+
+ ins_cost(LOAD_COST);
+ format %{ "ld $dst, $mem\t# class, #@loadKlass" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(iload_reg_mem);
+%}
+
+// Load Narrow Klass Pointer
+instruct loadNKlass(iRegNNoSp dst, memory mem)
+%{
+ match(Set dst (LoadNKlass mem));
+
+ ins_cost(LOAD_COST);
+ format %{ "lwu $dst, $mem\t# loadNKlass, compressed class ptr, #@loadNKlass" %}
+
+ ins_encode %{
+ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(iload_reg_mem);
+%}
+
+// Load Float
+instruct loadF(fRegF dst, memory mem)
+%{
+ match(Set dst (LoadF mem));
+
+ ins_cost(LOAD_COST);
+ format %{ "flw $dst, $mem\t# float, #@loadF" %}
+
+ ins_encode %{
+ __ flw(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(fp_load_mem_s);
+%}
+
+// Load Double
+instruct loadD(fRegD dst, memory mem)
+%{
+ match(Set dst (LoadD mem));
+
+ ins_cost(LOAD_COST);
+ format %{ "fld $dst, $mem\t# double, #@loadD" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ fld(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(fp_load_mem_d);
+%}
+
+// Load Int Constant
+instruct loadConI(iRegINoSp dst, immI src)
+%{
+ match(Set dst src);
+
+ ins_cost(ALU_COST);
+ format %{ "li $dst, $src\t# int, #@loadConI" %}
+
+ ins_encode(riscv_enc_li_imm(dst, src));
+
+ ins_pipe(ialu_imm);
+%}
+
+// Load Long Constant
+instruct loadConL(iRegLNoSp dst, immL src)
+%{
+ match(Set dst src);
+
+ ins_cost(ALU_COST);
+ format %{ "li $dst, $src\t# long, #@loadConL" %}
+
+ ins_encode(riscv_enc_li_imm(dst, src));
+
+ ins_pipe(ialu_imm);
+%}
+
+// Load Pointer Constant
+instruct loadConP(iRegPNoSp dst, immP con)
+%{
+ match(Set dst con);
+
+ ins_cost(ALU_COST);
+ format %{ "mv $dst, $con\t# ptr, #@loadConP" %}
+
+ ins_encode(riscv_enc_mov_p(dst, con));
+
+ ins_pipe(ialu_imm);
+%}
+
+// Load Null Pointer Constant
+instruct loadConP0(iRegPNoSp dst, immP0 con)
+%{
+ match(Set dst con);
+
+ ins_cost(ALU_COST);
+ format %{ "mv $dst, $con\t# NULL ptr, #@loadConP0" %}
+
+ ins_encode(riscv_enc_mov_zero(dst));
+
+ ins_pipe(ialu_imm);
+%}
+
+// Load Pointer Constant One
+instruct loadConP1(iRegPNoSp dst, immP_1 con)
+%{
+ match(Set dst con);
+
+ ins_cost(ALU_COST);
+ format %{ "mv $dst, $con\t# load ptr constant one, #@loadConP1" %}
+
+ ins_encode(riscv_enc_mov_p1(dst));
+
+ ins_pipe(ialu_imm);
+%}
+
+// Load Byte Map Base Constant
+instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
+%{
+ match(Set dst con);
+ ins_cost(ALU_COST);
+ format %{ "mv $dst, $con\t# Byte Map Base, #@loadByteMapBase" %}
+
+ ins_encode(riscv_enc_mov_byte_map_base(dst));
+
+ ins_pipe(ialu_imm);
+%}
+
+// Load Narrow Pointer Constant
+instruct loadConN(iRegNNoSp dst, immN con)
+%{
+ match(Set dst con);
+
+ ins_cost(ALU_COST * 4);
+ format %{ "mv $dst, $con\t# compressed ptr, #@loadConN" %}
+
+ ins_encode(riscv_enc_mov_n(dst, con));
+
+ ins_pipe(ialu_imm);
+%}
+
+// Load Narrow Null Pointer Constant
+instruct loadConN0(iRegNNoSp dst, immN0 con)
+%{
+ match(Set dst con);
+
+ ins_cost(ALU_COST);
+ format %{ "mv $dst, $con\t# compressed NULL ptr, #@loadConN0" %}
+
+ ins_encode(riscv_enc_mov_zero(dst));
+
+ ins_pipe(ialu_imm);
+%}
+
+// Load Narrow Klass Constant
+instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
+%{
+ match(Set dst con);
+
+ ins_cost(ALU_COST * 6);
+ format %{ "mv $dst, $con\t# compressed klass ptr, #@loadConNKlass" %}
+
+ ins_encode(riscv_enc_mov_nk(dst, con));
+
+ ins_pipe(ialu_imm);
+%}
+
+// Load Float Constant
+instruct loadConF(fRegF dst, immF con) %{
+ match(Set dst con);
+
+ ins_cost(LOAD_COST);
+ format %{
+ "flw $dst, [$constantaddress]\t# load from constant table: float=$con, #@loadConF"
+ %}
+
+ ins_encode %{
+ __ flw(as_FloatRegister($dst$$reg), $constantaddress($con));
+ %}
+
+ ins_pipe(fp_load_constant_s);
+%}
+
+instruct loadConF0(fRegF dst, immF0 con) %{
+ match(Set dst con);
+
+ ins_cost(XFER_COST);
+
+ format %{ "fmv.w.x $dst, zr\t# float, #@loadConF0" %}
+
+ ins_encode %{
+ __ fmv_w_x(as_FloatRegister($dst$$reg), zr);
+ %}
+
+ ins_pipe(fp_load_constant_s);
+%}
+
+// Load Double Constant
+instruct loadConD(fRegD dst, immD con) %{
+ match(Set dst con);
+
+ ins_cost(LOAD_COST);
+ format %{
+ "fld $dst, [$constantaddress]\t# load from constant table: double=$con, #@loadConD"
+ %}
+
+ ins_encode %{
+ __ fld(as_FloatRegister($dst$$reg), $constantaddress($con));
+ %}
+
+ ins_pipe(fp_load_constant_d);
+%}
+
+instruct loadConD0(fRegD dst, immD0 con) %{
+ match(Set dst con);
+
+ ins_cost(XFER_COST);
+
+ format %{ "fmv.d.x $dst, zr\t# double, #@loadConD0" %}
+
+ ins_encode %{
+ __ fmv_d_x(as_FloatRegister($dst$$reg), zr);
+ %}
+
+ ins_pipe(fp_load_constant_d);
+%}
+
+// Store Instructions
+// Store CMS card-mark Immediate
+instruct storeimmCM0(immI0 zero, memory mem)
+%{
+ match(Set mem (StoreCM mem zero));
+
+ ins_cost(STORE_COST);
+ format %{ "storestore (elided)\n\t"
+ "sb zr, $mem\t# byte, #@storeimmCM0" %}
+
+ ins_encode %{
+ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(istore_mem);
+%}
+
+// Store CMS card-mark Immediate with intervening StoreStore
+// needed when using CMS with no conditional card marking
+instruct storeimmCM0_ordered(immI0 zero, memory mem)
+%{
+ match(Set mem (StoreCM mem zero));
+
+ ins_cost(ALU_COST + STORE_COST);
+ format %{ "membar(StoreStore)\n\t"
+ "sb zr, $mem\t# byte, #@storeimmCM0_ordered" %}
+
+ ins_encode %{
+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
+ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(istore_mem);
+%}
+
+// Store Byte
+instruct storeB(iRegIorL2I src, memory mem)
+%{
+ match(Set mem (StoreB mem src));
+
+ ins_cost(STORE_COST);
+ format %{ "sb $src, $mem\t# byte, #@storeB" %}
+
+ ins_encode %{
+ __ sb(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(istore_reg_mem);
+%}
+
+instruct storeimmB0(immI0 zero, memory mem)
+%{
+ match(Set mem (StoreB mem zero));
+
+ ins_cost(STORE_COST);
+ format %{ "sb zr, $mem\t# byte, #@storeimmB0" %}
+
+ ins_encode %{
+ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(istore_mem);
+%}
+
+// Store Char/Short
+instruct storeC(iRegIorL2I src, memory mem)
+%{
+ match(Set mem (StoreC mem src));
+
+ ins_cost(STORE_COST);
+ format %{ "sh $src, $mem\t# short, #@storeC" %}
+
+ ins_encode %{
+ __ sh(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(istore_reg_mem);
+%}
+
+instruct storeimmC0(immI0 zero, memory mem)
+%{
+ match(Set mem (StoreC mem zero));
+
+ ins_cost(STORE_COST);
+ format %{ "sh zr, $mem\t# short, #@storeimmC0" %}
+
+ ins_encode %{
+ __ sh(zr, Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(istore_mem);
+%}
+
+// Store Integer
+instruct storeI(iRegIorL2I src, memory mem)
+%{
+ match(Set mem(StoreI mem src));
+
+ ins_cost(STORE_COST);
+ format %{ "sw $src, $mem\t# int, #@storeI" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(istore_reg_mem);
+%}
+
+instruct storeimmI0(immI0 zero, memory mem)
+%{
+ match(Set mem(StoreI mem zero));
+
+ ins_cost(STORE_COST);
+ format %{ "sw zr, $mem\t# int, #@storeimmI0" %}
+
+ ins_encode %{
+ __ sw(zr, Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(istore_mem);
+%}
+
+// Store Long (64 bit signed)
+instruct storeL(iRegL src, memory mem)
+%{
+ match(Set mem (StoreL mem src));
+
+ ins_cost(STORE_COST);
+ format %{ "sd $src, $mem\t# long, #@storeL" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(istore_reg_mem);
+%}
+
+// Store Long (64 bit signed)
+instruct storeimmL0(immL0 zero, memory mem)
+%{
+ match(Set mem (StoreL mem zero));
+
+ ins_cost(STORE_COST);
+ format %{ "sd zr, $mem\t# long, #@storeimmL0" %}
+
+ ins_encode %{
+ __ sd(zr, Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(istore_mem);
+%}
+
+// Store Pointer
+instruct storeP(iRegP src, memory mem)
+%{
+ match(Set mem (StoreP mem src));
+
+ ins_cost(STORE_COST);
+ format %{ "sd $src, $mem\t# ptr, #@storeP" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(istore_reg_mem);
+%}
+
+// Store Pointer
+instruct storeimmP0(immP0 zero, memory mem)
+%{
+ match(Set mem (StoreP mem zero));
+
+ ins_cost(STORE_COST);
+ format %{ "sd zr, $mem\t# ptr, #@storeimmP0" %}
+
+ ins_encode %{
+ __ sd(zr, Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(istore_mem);
+%}
+
+// Store Compressed Pointer
+instruct storeN(iRegN src, memory mem)
+%{
+ match(Set mem (StoreN mem src));
+
+ ins_cost(STORE_COST);
+ format %{ "sw $src, $mem\t# compressed ptr, #@storeN" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(istore_reg_mem);
+%}
+
+instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
+%{
+ match(Set mem (StoreN mem zero));
+
+ ins_cost(STORE_COST);
+ format %{ "sw rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %}
+
+ ins_encode %{
+ __ sw(as_Register($heapbase$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(istore_reg_mem);
+%}
+
+// Store Float
+instruct storeF(fRegF src, memory mem)
+%{
+ match(Set mem (StoreF mem src));
+
+ ins_cost(STORE_COST);
+ format %{ "fsw $src, $mem\t# float, #@storeF" %}
+
+ ins_encode %{
+ __ fsw(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(fp_store_reg_s);
+%}
+
+// Store Double
+instruct storeD(fRegD src, memory mem)
+%{
+ match(Set mem (StoreD mem src));
+
+ ins_cost(STORE_COST);
+ format %{ "fsd $src, $mem\t# double, #@storeD" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ fsd(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(fp_store_reg_d);
+%}
+
+// Store Compressed Klass Pointer
+instruct storeNKlass(iRegN src, memory mem)
+%{
+ match(Set mem (StoreNKlass mem src));
+
+ ins_cost(STORE_COST);
+ format %{ "sw $src, $mem\t# compressed klass ptr, #@storeNKlass" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
+ %}
+
+ ins_pipe(istore_reg_mem);
+%}
+
+// ============================================================================
+// Atomic operation instructions
+//
+// Intel and SPARC both implement Ideal Node LoadPLocked and
+// Store{PIL}Conditional instructions using a normal load for the
+// LoadPLocked and a CAS for the Store{PIL}Conditional.
+//
+// The ideal code appears only to use LoadPLocked/storePConditional as a
+// pair to lock object allocations from Eden space when not using
+// TLABs.
+//
+// There does not appear to be a Load{IL}Locked Ideal Node and the
+// Ideal code appears to use Store{IL}Conditional as an alias for CAS
+// and to use StoreIConditional only for 32-bit and StoreLConditional
+// only for 64-bit.
+//
+// We implement LoadPLocked and storePConditional instructions using,
+// respectively the RISCV hw load-reserve and store-conditional
+// instructions. Whereas we must implement each of
+// Store{IL}Conditional using a CAS which employs a pair of
+// instructions comprising a load-reserve followed by a
+// store-conditional.
+
+
+// Locked-load (load reserved) of the current heap-top
+// used when updating the eden heap top
+// implemented using lr_d on RISCV64
+instruct loadPLocked(iRegPNoSp dst, indirect mem)
+%{
+ match(Set dst (LoadPLocked mem));
+
+ ins_cost(ALU_COST * 2 + LOAD_COST);
+
+ format %{ "lr.d $dst, $mem\t# ptr load reserved, #@loadPLocked" %}
+
+ ins_encode %{
+ __ la(t0, Address(as_Register($mem$$base), $mem$$disp));
+ __ lr_d($dst$$Register, t0, Assembler::aq);
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+// Conditional-store of the updated heap-top.
+// Used during allocation of the shared heap.
+// implemented using sc_d on RISCV64.
+instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
+%{
+ match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
+
+ ins_cost(ALU_COST * 2 + STORE_COST);
+
+ format %{
+ "sc_d t1, $newval $heap_top_ptr,\t# ptr store conditional, #@storePConditional"
+ %}
+
+ ins_encode %{
+ __ la(t0, Address(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp));
+ __ sc_d($cr$$Register, $newval$$Register, t0, Assembler::rl);
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
+%{
+ match(Set cr (StoreLConditional mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + 2 * BRANCH_COST);
+
+ format %{
+ "cmpxchg t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
+ "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeLConditional"
+ %}
+
+ ins_encode %{
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register);
+ __ xorr($cr$$Register,$cr$$Register, $oldval$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+// storeIConditional also has acquire semantics, for no better reason
+// than matching storeLConditional.
+instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
+%{
+ match(Set cr (StoreIConditional mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2);
+
+ format %{
+ "cmpxchgw t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
+ "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeIConditional"
+ %}
+
+ ins_encode %{
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register);
+ __ xorr($cr$$Register,$cr$$Register, $oldval$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+// standard CompareAndSwapX when we are using barriers
+// these have higher priority than the rules selected by a predicate
+instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
+%{
+ match(Set res (CompareAndSwapB mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4);
+
+ effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+
+ format %{
+ "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t"
+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB"
+ %}
+
+ ins_encode %{
+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
+ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
+%{
+ match(Set res (CompareAndSwapS mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4);
+
+ effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+
+ format %{
+ "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t"
+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS"
+ %}
+
+ ins_encode %{
+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
+ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval)
+%{
+ match(Set res (CompareAndSwapI mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
+
+ format %{
+ "cmpxchg $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t"
+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapI"
+ %}
+
+ ins_encode(riscv_enc_cmpxchgw(res, mem, oldval, newval));
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval)
+%{
+ match(Set res (CompareAndSwapL mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
+
+ format %{
+ "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t"
+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapL"
+ %}
+
+ ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval));
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
+%{
+ predicate(n->as_LoadStore()->barrier_data() == 0);
+
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
+
+ format %{
+ "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t"
+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapP"
+ %}
+
+ ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval));
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval)
+%{
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4);
+
+ format %{
+ "cmpxchg $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t"
+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapN"
+ %}
+
+ ins_encode(riscv_enc_cmpxchgn(res, mem, oldval, newval));
+
+ ins_pipe(pipe_slow);
+%}
+
+// alternative CompareAndSwapX when we are eliding barriers
+instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
+%{
+ predicate(needs_acquiring_load_reserved(n));
+
+ match(Set res (CompareAndSwapB mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4);
+
+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+
+ format %{
+ "cmpxchg_acq $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t"
+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq"
+ %}
+
+ ins_encode %{
+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
+ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
+ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
+%{
+ predicate(needs_acquiring_load_reserved(n));
+
+ match(Set res (CompareAndSwapS mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4);
+
+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+
+ format %{
+ "cmpxchg_acq $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t"
+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq"
+ %}
+
+ ins_encode %{
+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
+ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
+ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval)
+%{
+ predicate(needs_acquiring_load_reserved(n));
+
+ match(Set res (CompareAndSwapI mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
+
+ format %{
+ "cmpxchg_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t"
+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapIAcq"
+ %}
+
+ ins_encode(riscv_enc_cmpxchgw_acq(res, mem, oldval, newval));
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval)
+%{
+ predicate(needs_acquiring_load_reserved(n));
+
+ match(Set res (CompareAndSwapL mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
+
+ format %{
+ "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t"
+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapLAcq"
+ %}
+
+ ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval));
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
+%{
+ predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
+
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
+
+ format %{
+ "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t"
+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapPAcq"
+ %}
+
+ ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval));
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval)
+%{
+ predicate(needs_acquiring_load_reserved(n));
+
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4);
+
+ format %{
+ "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t"
+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapNAcq"
+ %}
+
+ ins_encode(riscv_enc_cmpxchgn_acq(res, mem, oldval, newval));
+
+ ins_pipe(pipe_slow);
+%}
+
+// Sundry CAS operations. Note that release is always true,
+// regardless of the memory ordering of the CAS. This is because we
+// need the volatile case to be sequentially consistent but there is
+// no trailing StoreLoad barrier emitted by C2. Unfortunately we
+// can't check the type of memory ordering here, so we always emit a
+// sc_d(w) with rl bit set.
+instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
+%{
+ match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5);
+
+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+
+ format %{
+ "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB"
+ %}
+
+ ins_encode %{
+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
+ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
+%{
+ match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6);
+
+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+
+ format %{
+ "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS"
+ %}
+
+ ins_encode %{
+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
+ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
+%{
+ match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
+
+ effect(TEMP_DEF res);
+
+ format %{
+ "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeI"
+ %}
+
+ ins_encode %{
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval)
+%{
+ match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
+
+ effect(TEMP_DEF res);
+
+ format %{
+ "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeL"
+ %}
+
+ ins_encode %{
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval)
+%{
+ match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 3);
+
+ effect(TEMP_DEF res);
+
+ format %{
+ "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN"
+ %}
+
+ ins_encode %{
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
+%{
+ predicate(n->as_LoadStore()->barrier_data() == 0);
+ match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
+
+ effect(TEMP_DEF res);
+
+ format %{
+ "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeP"
+ %}
+
+ ins_encode %{
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
+%{
+ predicate(needs_acquiring_load_reserved(n));
+
+ match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5);
+
+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+
+ format %{
+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq"
+ %}
+
+ ins_encode %{
+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
+ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
+%{
+ predicate(needs_acquiring_load_reserved(n));
+
+ match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6);
+
+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+
+ format %{
+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq"
+ %}
+
+ ins_encode %{
+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
+ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
+%{
+ predicate(needs_acquiring_load_reserved(n));
+
+ match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
+
+ effect(TEMP_DEF res);
+
+ format %{
+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeIAcq"
+ %}
+
+ ins_encode %{
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval)
+%{
+ predicate(needs_acquiring_load_reserved(n));
+
+ match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
+
+ effect(TEMP_DEF res);
+
+ format %{
+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeLAcq"
+ %}
+
+ ins_encode %{
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval)
+%{
+ predicate(needs_acquiring_load_reserved(n));
+
+ match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
+
+ effect(TEMP_DEF res);
+
+ format %{
+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq"
+ %}
+
+ ins_encode %{
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
+%{
+ predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
+
+ match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
+
+ effect(TEMP_DEF res);
+
+ format %{
+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq"
+ %}
+
+ ins_encode %{
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
+%{
+ match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6);
+
+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+
+ format %{
+ "cmpxchg_weak $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapB"
+ %}
+
+ ins_encode %{
+ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
+ __ xori($res$$Register, $res$$Register, 1);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
+%{
+ match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7);
+
+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+
+ format %{
+ "cmpxchg_weak $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapS"
+ %}
+
+ ins_encode %{
+ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
+ __ xori($res$$Register, $res$$Register, 1);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
+%{
+ match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
+
+ format %{
+ "cmpxchg_weak $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapI"
+ %}
+
+ ins_encode %{
+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
+ __ xori($res$$Register, $res$$Register, 1);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
+%{
+ match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
+
+ format %{
+ "cmpxchg_weak $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapL"
+ %}
+
+ ins_encode %{
+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
+ __ xori($res$$Register, $res$$Register, 1);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
+%{
+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4);
+
+ format %{
+ "cmpxchg_weak $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapN"
+ %}
+
+ ins_encode %{
+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
+ __ xori($res$$Register, $res$$Register, 1);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
+%{
+ predicate(n->as_LoadStore()->barrier_data() == 0);
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
+
+ format %{
+ "cmpxchg_weak $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapP"
+ %}
+
+ ins_encode %{
+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
+ __ xori($res$$Register, $res$$Register, 1);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
+%{
+ predicate(needs_acquiring_load_reserved(n));
+
+ match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6);
+
+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+
+ format %{
+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapBAcq"
+ %}
+
+ ins_encode %{
+ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
+ __ xori($res$$Register, $res$$Register, 1);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
+%{
+ predicate(needs_acquiring_load_reserved(n));
+
+ match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7);
+
+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+
+ format %{
+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapSAcq"
+ %}
+
+ ins_encode %{
+ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
+ __ xori($res$$Register, $res$$Register, 1);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
+%{
+ predicate(needs_acquiring_load_reserved(n));
+
+ match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
+
+ format %{
+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapIAcq"
+ %}
+
+ ins_encode %{
+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
+ __ xori($res$$Register, $res$$Register, 1);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
+%{
+ predicate(needs_acquiring_load_reserved(n));
+
+ match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
+
+ format %{
+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapLAcq"
+ %}
+
+ ins_encode %{
+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
+ __ xori($res$$Register, $res$$Register, 1);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
+%{
+ predicate(needs_acquiring_load_reserved(n));
+
+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4);
+
+ format %{
+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapNAcq"
+ %}
+
+ ins_encode %{
+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
+ __ xori($res$$Register, $res$$Register, 1);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
+%{
+ predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
+
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
+
+ format %{
+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapPAcq"
+ %}
+
+ ins_encode %{
+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
+ __ xori($res$$Register, $res$$Register, 1);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev)
+%{
+ match(Set prev (GetAndSetI mem newv));
+
+ ins_cost(ALU_COST);
+
+ format %{ "atomic_xchgw $prev, $newv, [$mem]\t#@get_and_setI" %}
+
+ ins_encode %{
+ __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev)
+%{
+ match(Set prev (GetAndSetL mem newv));
+
+ ins_cost(ALU_COST);
+
+ format %{ "atomic_xchg $prev, $newv, [$mem]\t#@get_and_setL" %}
+
+ ins_encode %{
+ __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev)
+%{
+ match(Set prev (GetAndSetN mem newv));
+
+ ins_cost(ALU_COST);
+
+ format %{ "atomic_xchgwu $prev, $newv, [$mem]\t#@get_and_setN" %}
+
+ ins_encode %{
+ __ atomic_xchgwu($prev$$Register, $newv$$Register, as_Register($mem$$base));
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev)
+%{
+ predicate(n->as_LoadStore()->barrier_data() == 0);
+ match(Set prev (GetAndSetP mem newv));
+
+ ins_cost(ALU_COST);
+
+ format %{ "atomic_xchg $prev, $newv, [$mem]\t#@get_and_setP" %}
+
+ ins_encode %{
+ __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev)
+%{
+ predicate(needs_acquiring_load_reserved(n));
+
+ match(Set prev (GetAndSetI mem newv));
+
+ ins_cost(ALU_COST);
+
+ format %{ "atomic_xchgw_acq $prev, $newv, [$mem]\t#@get_and_setIAcq" %}
+
+ ins_encode %{
+ __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev)
+%{
+ predicate(needs_acquiring_load_reserved(n));
+
+ match(Set prev (GetAndSetL mem newv));
+
+ ins_cost(ALU_COST);
+
+ format %{ "atomic_xchg_acq $prev, $newv, [$mem]\t#@get_and_setLAcq" %}
+
+ ins_encode %{
+ __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev)
+%{
+ predicate(needs_acquiring_load_reserved(n));
+
+ match(Set prev (GetAndSetN mem newv));
+
+ ins_cost(ALU_COST);
+
+ format %{ "atomic_xchgwu_acq $prev, $newv, [$mem]\t#@get_and_setNAcq" %}
+
+ ins_encode %{
+ __ atomic_xchgalwu($prev$$Register, $newv$$Register, as_Register($mem$$base));
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev)
+%{
+ predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
+
+ match(Set prev (GetAndSetP mem newv));
+
+ ins_cost(ALU_COST);
+
+ format %{ "atomic_xchg_acq $prev, $newv, [$mem]\t#@get_and_setPAcq" %}
+
+ ins_encode %{
+ __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr)
+%{
+ match(Set newval (GetAndAddL mem incr));
+
+ ins_cost(ALU_COST);
+
+ format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addL" %}
+
+ ins_encode %{
+ __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr)
+%{
+ predicate(n->as_LoadStore()->result_not_used());
+
+ match(Set dummy (GetAndAddL mem incr));
+
+ ins_cost(ALU_COST);
+
+ format %{ "get_and_addL [$mem], $incr\t#@get_and_addL_no_res" %}
+
+ ins_encode %{
+ __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAdd incr)
+%{
+ match(Set newval (GetAndAddL mem incr));
+
+ ins_cost(ALU_COST);
+
+ format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addLi" %}
+
+ ins_encode %{
+ __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAdd incr)
+%{
+ predicate(n->as_LoadStore()->result_not_used());
+
+ match(Set dummy (GetAndAddL mem incr));
+
+ ins_cost(ALU_COST);
+
+ format %{ "get_and_addL [$mem], $incr\t#@get_and_addLi_no_res" %}
+
+ ins_encode %{
+ __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr)
+%{
+ match(Set newval (GetAndAddI mem incr));
+
+ ins_cost(ALU_COST);
+
+ format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addI" %}
+
+ ins_encode %{
+ __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr)
+%{
+ predicate(n->as_LoadStore()->result_not_used());
+
+ match(Set dummy (GetAndAddI mem incr));
+
+ ins_cost(ALU_COST);
+
+ format %{ "get_and_addI [$mem], $incr\t#@get_and_addI_no_res" %}
+
+ ins_encode %{
+ __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAdd incr)
+%{
+ match(Set newval (GetAndAddI mem incr));
+
+ ins_cost(ALU_COST);
+
+ format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addIi" %}
+
+ ins_encode %{
+ __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAdd incr)
+%{
+ predicate(n->as_LoadStore()->result_not_used());
+
+ match(Set dummy (GetAndAddI mem incr));
+
+ ins_cost(ALU_COST);
+
+ format %{ "get_and_addI [$mem], $incr\t#@get_and_addIi_no_res" %}
+
+ ins_encode %{
+ __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr)
+%{
+ predicate(needs_acquiring_load_reserved(n));
+
+ match(Set newval (GetAndAddL mem incr));
+
+ ins_cost(ALU_COST);
+
+ format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLAcq" %}
+
+ ins_encode %{
+ __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base));
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{
+ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
+
+ match(Set dummy (GetAndAddL mem incr));
+
+ ins_cost(ALU_COST);
+
+ format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addL_no_resAcq" %}
+
+ ins_encode %{
+ __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base));
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAdd incr)
+%{
+ predicate(needs_acquiring_load_reserved(n));
+
+ match(Set newval (GetAndAddL mem incr));
+
+ ins_cost(ALU_COST);
+
+ format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLiAcq" %}
+
+ ins_encode %{
+ __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base));
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAdd incr)
+%{
+ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
+
+ match(Set dummy (GetAndAddL mem incr));
+
+ ins_cost(ALU_COST);
+
+ format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addLi_no_resAcq" %}
+
+ ins_encode %{
+ __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base));
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr)
+%{
+ predicate(needs_acquiring_load_reserved(n));
+
+ match(Set newval (GetAndAddI mem incr));
+
+ ins_cost(ALU_COST);
+
+ format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIAcq" %}
+
+ ins_encode %{
+ __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base));
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr)
+%{
+ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
+
+ match(Set dummy (GetAndAddI mem incr));
+
+ ins_cost(ALU_COST);
+
+ format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addI_no_resAcq" %}
+
+ ins_encode %{
+ __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base));
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAdd incr)
+%{
+ predicate(needs_acquiring_load_reserved(n));
+
+ match(Set newval (GetAndAddI mem incr));
+
+ ins_cost(ALU_COST);
+
+ format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIiAcq" %}
+
+ ins_encode %{
+ __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base));
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAdd incr)
+%{
+ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
+
+ match(Set dummy (GetAndAddI mem incr));
+
+ ins_cost(ALU_COST);
+
+ format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addIi_no_resAcq" %}
+
+ ins_encode %{
+ __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base));
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+// ============================================================================
+// Arithmetic Instructions
+//
+
+// Integer Addition
+
+// TODO
+// these currently employ operations which do not set CR and hence are
+// not flagged as killing CR but we would like to isolate the cases
+// where we want to set flags from those where we don't. need to work
+// out how to do that.
+instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+ match(Set dst (AddI src1 src2));
+
+ ins_cost(ALU_COST);
+ format %{ "addw $dst, $src1, $src2\t#@addI_reg_reg" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ addw(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ as_Register($src2$$reg));
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{
+ match(Set dst (AddI src1 src2));
+
+ ins_cost(ALU_COST);
+ format %{ "addiw $dst, $src1, $src2\t#@addI_reg_imm" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ int32_t con = (int32_t)$src2$$constant;
+ __ addiw(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ $src2$$constant);
+ %}
+
+ ins_pipe(ialu_reg_imm);
+%}
+
+instruct addI_reg_imm_l2i(iRegINoSp dst, iRegL src1, immIAdd src2) %{
+ match(Set dst (AddI (ConvL2I src1) src2));
+
+ ins_cost(ALU_COST);
+ format %{ "addiw $dst, $src1, $src2\t#@addI_reg_imm_l2i" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ addiw(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ $src2$$constant);
+ %}
+
+ ins_pipe(ialu_reg_imm);
+%}
+
+// Pointer Addition
+instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
+ match(Set dst (AddP src1 src2));
+
+ ins_cost(ALU_COST);
+ format %{ "add $dst, $src1, $src2\t# ptr, #@addP_reg_reg" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ add(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ as_Register($src2$$reg));
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+// If we shift more than 32 bits, we need not convert I2L.
+instruct lShiftL_regI_immGE32(iRegLNoSp dst, iRegI src, uimmI6_ge32 scale) %{
+ match(Set dst (LShiftL (ConvI2L src) scale));
+ ins_cost(ALU_COST);
+ format %{ "slli $dst, $src, $scale & 63\t#@lShiftL_regI_immGE32" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ slli(as_Register($dst$$reg), as_Register($src$$reg), $scale$$constant & 63);
+ %}
+
+ ins_pipe(ialu_reg_shift);
+%}
+
+// Pointer Immediate Addition
+// n.b. this needs to be more expensive than using an indirect memory
+// operand
+instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAdd src2) %{
+ match(Set dst (AddP src1 src2));
+ ins_cost(ALU_COST);
+ format %{ "addi $dst, $src1, $src2\t# ptr, #@addP_reg_imm" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ // src2 is imm, so actually call the addi
+ __ add(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ $src2$$constant);
+ %}
+
+ ins_pipe(ialu_reg_imm);
+%}
+
+// Long Addition
+instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+ match(Set dst (AddL src1 src2));
+ ins_cost(ALU_COST);
+ format %{ "add $dst, $src1, $src2\t#@addL_reg_reg" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ add(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ as_Register($src2$$reg));
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+// No constant pool entries requiredLong Immediate Addition.
+instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
+ match(Set dst (AddL src1 src2));
+ ins_cost(ALU_COST);
+ format %{ "addi $dst, $src1, $src2\t#@addL_reg_imm" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ // src2 is imm, so actually call the addi
+ __ add(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ $src2$$constant);
+ %}
+
+ ins_pipe(ialu_reg_imm);
+%}
+
+// Integer Subtraction
+instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+ match(Set dst (SubI src1 src2));
+
+ ins_cost(ALU_COST);
+ format %{ "subw $dst, $src1, $src2\t#@subI_reg_reg" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ subw(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ as_Register($src2$$reg));
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+// Immediate Subtraction
+instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immISub src2) %{
+ match(Set dst (SubI src1 src2));
+
+ ins_cost(ALU_COST);
+ format %{ "addiw $dst, $src1, -$src2\t#@subI_reg_imm" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ // src2 is imm, so actually call the addiw
+ __ subw(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ $src2$$constant);
+ %}
+
+ ins_pipe(ialu_reg_imm);
+%}
+
+// Long Subtraction
+instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+ match(Set dst (SubL src1 src2));
+ ins_cost(ALU_COST);
+ format %{ "sub $dst, $src1, $src2\t#@subL_reg_reg" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ sub(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ as_Register($src2$$reg));
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+// No constant pool entries requiredLong Immediate Subtraction.
+instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLSub src2) %{
+ match(Set dst (SubL src1 src2));
+ ins_cost(ALU_COST);
+ format %{ "addi $dst, $src1, -$src2\t#@subL_reg_imm" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ // src2 is imm, so actually call the addi
+ __ sub(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ $src2$$constant);
+ %}
+
+ ins_pipe(ialu_reg_imm);
+%}
+
+// Integer Negation (special case for sub)
+
+instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
+ match(Set dst (SubI zero src));
+ ins_cost(ALU_COST);
+ format %{ "subw $dst, x0, $src\t# int, #@negI_reg" %}
+
+ ins_encode %{
+ // actually call the subw
+ __ negw(as_Register($dst$$reg),
+ as_Register($src$$reg));
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+// Long Negation
+
+instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero) %{
+ match(Set dst (SubL zero src));
+ ins_cost(ALU_COST);
+ format %{ "sub $dst, x0, $src\t# long, #@negL_reg" %}
+
+ ins_encode %{
+ // actually call the sub
+ __ neg(as_Register($dst$$reg),
+ as_Register($src$$reg));
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+// Integer Multiply
+
+instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+ match(Set dst (MulI src1 src2));
+ ins_cost(IMUL_COST);
+ format %{ "mulw $dst, $src1, $src2\t#@mulI" %}
+
+ //this means 2 word multi, and no sign extend to 64 bits
+ ins_encode %{
+ // riscv64 mulw will sign-extension to high 32 bits in dst reg
+ __ mulw(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ as_Register($src2$$reg));
+ %}
+
+ ins_pipe(imul_reg_reg);
+%}
+
+// Long Multiply
+
+instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+ match(Set dst (MulL src1 src2));
+ ins_cost(IMUL_COST);
+ format %{ "mul $dst, $src1, $src2\t#@mulL" %}
+
+ ins_encode %{
+ __ mul(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ as_Register($src2$$reg));
+ %}
+
+ ins_pipe(lmul_reg_reg);
+%}
+
+instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2)
+%{
+ match(Set dst (MulHiL src1 src2));
+ ins_cost(IMUL_COST);
+ format %{ "mulh $dst, $src1, $src2\t# mulhi, #@mulHiL_rReg" %}
+
+ ins_encode %{
+ __ mulh(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ as_Register($src2$$reg));
+ %}
+
+ ins_pipe(lmul_reg_reg);
+%}
+
+// Integer Divide
+
+instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+ match(Set dst (DivI src1 src2));
+ ins_cost(IDIVSI_COST);
+ format %{ "divw $dst, $src1, $src2\t#@divI"%}
+
+ ins_encode(riscv_enc_divw(dst, src1, src2));
+ ins_pipe(idiv_reg_reg);
+%}
+
+instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
+ match(Set dst (URShiftI (RShiftI src1 div1) div2));
+ ins_cost(ALU_COST);
+ format %{ "srliw $dst, $src1, $div1\t# int signExtract, #@signExtract" %}
+
+ ins_encode %{
+ __ srliw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
+ %}
+ ins_pipe(ialu_reg_shift);
+%}
+
+// Long Divide
+
+instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+ match(Set dst (DivL src1 src2));
+ ins_cost(IDIVDI_COST);
+ format %{ "div $dst, $src1, $src2\t#@divL" %}
+
+ ins_encode(riscv_enc_div(dst, src1, src2));
+ ins_pipe(ldiv_reg_reg);
+%}
+
+instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{
+ match(Set dst (URShiftL (RShiftL src1 div1) div2));
+ ins_cost(ALU_COST);
+ format %{ "srli $dst, $src1, $div1\t# long signExtract, #@signExtractL" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ srli(as_Register($dst$$reg), as_Register($src1$$reg), 63);
+ %}
+ ins_pipe(ialu_reg_shift);
+%}
+
+// Integer Remainder
+
+instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+ match(Set dst (ModI src1 src2));
+ ins_cost(IDIVSI_COST);
+ format %{ "remw $dst, $src1, $src2\t#@modI" %}
+
+ ins_encode(riscv_enc_modw(dst, src1, src2));
+ ins_pipe(ialu_reg_reg);
+%}
+
+// Long Remainder
+
+instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+ match(Set dst (ModL src1 src2));
+ ins_cost(IDIVDI_COST);
+ format %{ "rem $dst, $src1, $src2\t#@modL" %}
+
+ ins_encode(riscv_enc_mod(dst, src1, src2));
+ ins_pipe(ialu_reg_reg);
+%}
+
+// Integer Shifts
+
+// Shift Left Register
+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
+instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+ match(Set dst (LShiftI src1 src2));
+ ins_cost(ALU_COST);
+ format %{ "sllw $dst, $src1, $src2\t#@lShiftI_reg_reg" %}
+
+ ins_encode %{
+ __ sllw(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ as_Register($src2$$reg));
+ %}
+
+ ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// Shift Left Immediate
+instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
+ match(Set dst (LShiftI src1 src2));
+ ins_cost(ALU_COST);
+ format %{ "slliw $dst, $src1, ($src2 & 0x1f)\t#@lShiftI_reg_imm" %}
+
+ ins_encode %{
+ // the shift amount is encoded in the lower
+ // 5 bits of the I-immediate field for RV32I
+ __ slliw(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ (unsigned) $src2$$constant & 0x1f);
+ %}
+
+ ins_pipe(ialu_reg_shift);
+%}
+
+// Shift Right Logical Register
+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
+instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+ match(Set dst (URShiftI src1 src2));
+ ins_cost(ALU_COST);
+ format %{ "srlw $dst, $src1, $src2\t#@urShiftI_reg_reg" %}
+
+ ins_encode %{
+ __ srlw(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ as_Register($src2$$reg));
+ %}
+
+ ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// Shift Right Logical Immediate
+instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
+ match(Set dst (URShiftI src1 src2));
+ ins_cost(ALU_COST);
+ format %{ "srliw $dst, $src1, ($src2 & 0x1f)\t#@urShiftI_reg_imm" %}
+
+ ins_encode %{
+ // the shift amount is encoded in the lower
+ // 6 bits of the I-immediate field for RV64I
+ __ srliw(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ (unsigned) $src2$$constant & 0x1f);
+ %}
+
+ ins_pipe(ialu_reg_shift);
+%}
+
+// Shift Right Arithmetic Register
+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
+instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+ match(Set dst (RShiftI src1 src2));
+ ins_cost(ALU_COST);
+ format %{ "sraw $dst, $src1, $src2\t#@rShiftI_reg_reg" %}
+
+ ins_encode %{
+ // riscv will sign-ext dst high 32 bits
+ __ sraw(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ as_Register($src2$$reg));
+ %}
+
+ ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// Shift Right Arithmetic Immediate
+instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
+ match(Set dst (RShiftI src1 src2));
+ ins_cost(ALU_COST);
+ format %{ "sraiw $dst, $src1, ($src2 & 0x1f)\t#@rShiftI_reg_imm" %}
+
+ ins_encode %{
+ // riscv will sign-ext dst high 32 bits
+ __ sraiw(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ (unsigned) $src2$$constant & 0x1f);
+ %}
+
+ ins_pipe(ialu_reg_shift);
+%}
+
+// Long Shifts
+
+// Shift Left Register
+// In RV64I, only the low 6 bits of src2 are considered for the shift amount
+instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
+ match(Set dst (LShiftL src1 src2));
+
+ ins_cost(ALU_COST);
+ format %{ "sll $dst, $src1, $src2\t#@lShiftL_reg_reg" %}
+
+ ins_encode %{
+ __ sll(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ as_Register($src2$$reg));
+ %}
+
+ ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// Shift Left Immediate
+instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
+ match(Set dst (LShiftL src1 src2));
+
+ ins_cost(ALU_COST);
+ format %{ "slli $dst, $src1, ($src2 & 0x3f)\t#@lShiftL_reg_imm" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ // the shift amount is encoded in the lower
+ // 6 bits of the I-immediate field for RV64I
+ __ slli(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ (unsigned) $src2$$constant & 0x3f);
+ %}
+
+ ins_pipe(ialu_reg_shift);
+%}
+
+// Shift Right Logical Register
+// In RV64I, only the low 6 bits of src2 are considered for the shift amount
+instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
+ match(Set dst (URShiftL src1 src2));
+
+ ins_cost(ALU_COST);
+ format %{ "srl $dst, $src1, $src2\t#@urShiftL_reg_reg" %}
+
+ ins_encode %{
+ __ srl(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ as_Register($src2$$reg));
+ %}
+
+ ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// Shift Right Logical Immediate
+instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
+ match(Set dst (URShiftL src1 src2));
+
+ ins_cost(ALU_COST);
+ format %{ "srli $dst, $src1, ($src2 & 0x3f)\t#@urShiftL_reg_imm" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ // the shift amount is encoded in the lower
+ // 6 bits of the I-immediate field for RV64I
+ __ srli(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ (unsigned) $src2$$constant & 0x3f);
+ %}
+
+ ins_pipe(ialu_reg_shift);
+%}
+
+// A special-case pattern for card table stores.
+instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
+ match(Set dst (URShiftL (CastP2X src1) src2));
+
+ ins_cost(ALU_COST);
+ format %{ "srli $dst, p2x($src1), ($src2 & 0x3f)\t#@urShiftP_reg_imm" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ // the shift amount is encoded in the lower
+ // 6 bits of the I-immediate field for RV64I
+ __ srli(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ (unsigned) $src2$$constant & 0x3f);
+ %}
+
+ ins_pipe(ialu_reg_shift);
+%}
+
+// Shift Right Arithmetic Register
+// In RV64I, only the low 6 bits of src2 are considered for the shift amount
+instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
+ match(Set dst (RShiftL src1 src2));
+
+ ins_cost(ALU_COST);
+ format %{ "sra $dst, $src1, $src2\t#@rShiftL_reg_reg" %}
+
+ ins_encode %{
+ __ sra(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ as_Register($src2$$reg));
+ %}
+
+ ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// Shift Right Arithmetic Immediate
+instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
+ match(Set dst (RShiftL src1 src2));
+
+ ins_cost(ALU_COST);
+ format %{ "srai $dst, $src1, ($src2 & 0x3f)\t#@rShiftL_reg_imm" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ // the shift amount is encoded in the lower
+ // 6 bits of the I-immediate field for RV64I
+ __ srai(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ (unsigned) $src2$$constant & 0x3f);
+ %}
+
+ ins_pipe(ialu_reg_shift);
+%}
+
+instruct regI_not_reg(iRegINoSp dst, iRegI src1, immI_M1 m1) %{
+ match(Set dst (XorI src1 m1));
+ ins_cost(ALU_COST);
+ format %{ "xori $dst, $src1, -1\t#@regI_not_reg" %}
+
+ ins_encode %{
+ __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1);
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+instruct regL_not_reg(iRegLNoSp dst, iRegL src1, immL_M1 m1) %{
+ match(Set dst (XorL src1 m1));
+ ins_cost(ALU_COST);
+ format %{ "xori $dst, $src1, -1\t#@regL_not_reg" %}
+
+ ins_encode %{
+ __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1);
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+
+// ============================================================================
+// Floating Point Arithmetic Instructions
+
+instruct addF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
+ match(Set dst (AddF src1 src2));
+
+ ins_cost(FMUL_SINGLE_COST);
+ format %{ "fadd.s $dst, $src1, $src2\t#@addF_reg_reg" %}
+
+ ins_encode %{
+ __ fadd_s(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+
+ ins_pipe(fp_dop_reg_reg_s);
+%}
+
+instruct addD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
+ match(Set dst (AddD src1 src2));
+
+ ins_cost(FMUL_DOUBLE_COST);
+ format %{ "fadd.d $dst, $src1, $src2\t#@addD_reg_reg" %}
+
+ ins_encode %{
+ __ fadd_d(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+
+ ins_pipe(fp_dop_reg_reg_d);
+%}
+
+instruct subF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
+ match(Set dst (SubF src1 src2));
+
+ ins_cost(FMUL_SINGLE_COST);
+ format %{ "fsub.s $dst, $src1, $src2\t#@subF_reg_reg" %}
+
+ ins_encode %{
+ __ fsub_s(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+
+ ins_pipe(fp_dop_reg_reg_s);
+%}
+
+instruct subD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
+ match(Set dst (SubD src1 src2));
+
+ ins_cost(FMUL_DOUBLE_COST);
+ format %{ "fsub.d $dst, $src1, $src2\t#@subD_reg_reg" %}
+
+ ins_encode %{
+ __ fsub_d(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+
+ ins_pipe(fp_dop_reg_reg_d);
+%}
+
+instruct mulF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
+ match(Set dst (MulF src1 src2));
+
+ ins_cost(FMUL_SINGLE_COST);
+ format %{ "fmul.s $dst, $src1, $src2\t#@mulF_reg_reg" %}
+
+ ins_encode %{
+ __ fmul_s(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+
+ ins_pipe(fp_dop_reg_reg_s);
+%}
+
+instruct mulD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
+ match(Set dst (MulD src1 src2));
+
+ ins_cost(FMUL_DOUBLE_COST);
+ format %{ "fmul.d $dst, $src1, $src2\t#@mulD_reg_reg" %}
+
+ ins_encode %{
+ __ fmul_d(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+
+ ins_pipe(fp_dop_reg_reg_d);
+%}
+
+// src1 * src2 + src3
+instruct maddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
+ predicate(UseFMA);
+ match(Set dst (FmaF src3 (Binary src1 src2)));
+
+ ins_cost(FMUL_SINGLE_COST);
+ format %{ "fmadd.s $dst, $src1, $src2, $src3\t#@maddF_reg_reg" %}
+
+ ins_encode %{
+ __ fmadd_s(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg),
+ as_FloatRegister($src3$$reg));
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
+// src1 * src2 + src3
+instruct maddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
+ predicate(UseFMA);
+ match(Set dst (FmaD src3 (Binary src1 src2)));
+
+ ins_cost(FMUL_DOUBLE_COST);
+ format %{ "fmadd.d $dst, $src1, $src2, $src3\t#@maddD_reg_reg" %}
+
+ ins_encode %{
+ __ fmadd_d(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg),
+ as_FloatRegister($src3$$reg));
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
+// src1 * src2 - src3
+instruct msubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
+ predicate(UseFMA);
+ match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
+
+ ins_cost(FMUL_SINGLE_COST);
+ format %{ "fmsub.s $dst, $src1, $src2, $src3\t#@msubF_reg_reg" %}
+
+ ins_encode %{
+ __ fmsub_s(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg),
+ as_FloatRegister($src3$$reg));
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
+// src1 * src2 - src3
+instruct msubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
+ predicate(UseFMA);
+ match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
+
+ ins_cost(FMUL_DOUBLE_COST);
+ format %{ "fmsub.d $dst, $src1, $src2, $src3\t#@msubD_reg_reg" %}
+
+ ins_encode %{
+ __ fmsub_d(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg),
+ as_FloatRegister($src3$$reg));
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
+// -src1 * src2 + src3
+instruct nmsubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
+ predicate(UseFMA);
+ match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
+ match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
+
+ ins_cost(FMUL_SINGLE_COST);
+ format %{ "fnmsub.s $dst, $src1, $src2, $src3\t#@nmsubF_reg_reg" %}
+
+ ins_encode %{
+ __ fnmsub_s(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg),
+ as_FloatRegister($src3$$reg));
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
+// -src1 * src2 + src3
+instruct nmsubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
+ predicate(UseFMA);
+ match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
+ match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
+
+ ins_cost(FMUL_DOUBLE_COST);
+ format %{ "fnmsub.d $dst, $src1, $src2, $src3\t#@nmsubD_reg_reg" %}
+
+ ins_encode %{
+ __ fnmsub_d(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg),
+ as_FloatRegister($src3$$reg));
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
+// -src1 * src2 - src3
+instruct nmaddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
+ predicate(UseFMA);
+ match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
+ match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
+
+ ins_cost(FMUL_SINGLE_COST);
+ format %{ "fnmadd.s $dst, $src1, $src2, $src3\t#@nmaddF_reg_reg" %}
+
+ ins_encode %{
+ __ fnmadd_s(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg),
+ as_FloatRegister($src3$$reg));
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
+// -src1 * src2 - src3
+instruct nmaddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
+ predicate(UseFMA);
+ match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
+ match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
+
+ ins_cost(FMUL_DOUBLE_COST);
+ format %{ "fnmadd.d $dst, $src1, $src2, $src3\t#@nmaddD_reg_reg" %}
+
+ ins_encode %{
+ __ fnmadd_d(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg),
+ as_FloatRegister($src3$$reg));
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
+// Math.max(FF)F
+instruct maxF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
+ match(Set dst (MaxF src1 src2));
+ effect(TEMP_DEF dst);
+
+ format %{ "maxF $dst, $src1, $src2" %}
+
+ ins_encode %{
+ __ minmax_FD(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
+ false /* is_double */, false /* is_min */);
+ %}
+
+ ins_pipe(fp_dop_reg_reg_s);
+%}
+
+// Math.min(FF)F
+instruct minF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
+ match(Set dst (MinF src1 src2));
+ effect(TEMP_DEF dst);
+
+ format %{ "minF $dst, $src1, $src2" %}
+
+ ins_encode %{
+ __ minmax_FD(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
+ false /* is_double */, true /* is_min */);
+ %}
+
+ ins_pipe(fp_dop_reg_reg_s);
+%}
+
+// Math.max(DD)D
+instruct maxD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
+ match(Set dst (MaxD src1 src2));
+ effect(TEMP_DEF dst);
+
+ format %{ "maxD $dst, $src1, $src2" %}
+
+ ins_encode %{
+ __ minmax_FD(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
+ true /* is_double */, false /* is_min */);
+ %}
+
+ ins_pipe(fp_dop_reg_reg_d);
+%}
+
+// Math.min(DD)D
+instruct minD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
+ match(Set dst (MinD src1 src2));
+ effect(TEMP_DEF dst);
+
+ format %{ "minD $dst, $src1, $src2" %}
+
+ ins_encode %{
+ __ minmax_FD(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
+ true /* is_double */, true /* is_min */);
+ %}
+
+ ins_pipe(fp_dop_reg_reg_d);
+%}
+
+instruct divF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
+ match(Set dst (DivF src1 src2));
+
+ ins_cost(FDIV_COST);
+ format %{ "fdiv.s $dst, $src1, $src2\t#@divF_reg_reg" %}
+
+ ins_encode %{
+ __ fdiv_s(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+
+ ins_pipe(fp_div_s);
+%}
+
+instruct divD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
+ match(Set dst (DivD src1 src2));
+
+ ins_cost(FDIV_COST);
+ format %{ "fdiv.d $dst, $src1, $src2\t#@divD_reg_reg" %}
+
+ ins_encode %{
+ __ fdiv_d(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+
+ ins_pipe(fp_div_d);
+%}
+
+instruct negF_reg_reg(fRegF dst, fRegF src) %{
+ match(Set dst (NegF src));
+
+ ins_cost(XFER_COST);
+ format %{ "fsgnjn.s $dst, $src, $src\t#@negF_reg_reg" %}
+
+ ins_encode %{
+ __ fneg_s(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src$$reg));
+ %}
+
+ ins_pipe(fp_uop_s);
+%}
+
+instruct negD_reg_reg(fRegD dst, fRegD src) %{
+ match(Set dst (NegD src));
+
+ ins_cost(XFER_COST);
+ format %{ "fsgnjn.d $dst, $src, $src\t#@negD_reg_reg" %}
+
+ ins_encode %{
+ __ fneg_d(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src$$reg));
+ %}
+
+ ins_pipe(fp_uop_d);
+%}
+
+instruct absI_reg(iRegINoSp dst, iRegIorL2I src) %{
+ match(Set dst (AbsI src));
+
+ ins_cost(ALU_COST * 3);
+ format %{
+ "sraiw t0, $src, 0x1f\n\t"
+ "addw $dst, $src, t0\n\t"
+ "xorr $dst, $dst, t0\t#@absI_reg"
+ %}
+
+ ins_encode %{
+ __ sraiw(t0, as_Register($src$$reg), 0x1f);
+ __ addw(as_Register($dst$$reg), as_Register($src$$reg), t0);
+ __ xorr(as_Register($dst$$reg), as_Register($dst$$reg), t0);
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct absL_reg(iRegLNoSp dst, iRegL src) %{
+ match(Set dst (AbsL src));
+
+ ins_cost(ALU_COST * 3);
+ format %{
+ "srai t0, $src, 0x3f\n\t"
+ "add $dst, $src, t0\n\t"
+ "xorr $dst, $dst, t0\t#@absL_reg"
+ %}
+
+ ins_encode %{
+ __ srai(t0, as_Register($src$$reg), 0x3f);
+ __ add(as_Register($dst$$reg), as_Register($src$$reg), t0);
+ __ xorr(as_Register($dst$$reg), as_Register($dst$$reg), t0);
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct absF_reg(fRegF dst, fRegF src) %{
+ match(Set dst (AbsF src));
+
+ ins_cost(XFER_COST);
+ format %{ "fsgnjx.s $dst, $src, $src\t#@absF_reg" %}
+ ins_encode %{
+ __ fabs_s(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src$$reg));
+ %}
+
+ ins_pipe(fp_uop_s);
+%}
+
+instruct absD_reg(fRegD dst, fRegD src) %{
+ match(Set dst (AbsD src));
+
+ ins_cost(XFER_COST);
+ format %{ "fsgnjx.d $dst, $src, $src\t#@absD_reg" %}
+ ins_encode %{
+ __ fabs_d(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src$$reg));
+ %}
+
+ ins_pipe(fp_uop_d);
+%}
+
+instruct sqrtF_reg(fRegF dst, fRegF src) %{
+ match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
+
+ ins_cost(FSQRT_COST);
+ format %{ "fsqrt.s $dst, $src\t#@sqrtF_reg" %}
+ ins_encode %{
+ __ fsqrt_s(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src$$reg));
+ %}
+
+ ins_pipe(fp_sqrt_s);
+%}
+
+instruct sqrtD_reg(fRegD dst, fRegD src) %{
+ match(Set dst (SqrtD src));
+
+ ins_cost(FSQRT_COST);
+ format %{ "fsqrt.d $dst, $src\t#@sqrtD_reg" %}
+ ins_encode %{
+ __ fsqrt_d(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src$$reg));
+ %}
+
+ ins_pipe(fp_sqrt_d);
+%}
+
+// Arithmetic Instructions End
+
+// ============================================================================
+// Logical Instructions
+
+// Register And
+instruct andI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
+ match(Set dst (AndI src1 src2));
+
+ format %{ "andr $dst, $src1, $src2\t#@andI_reg_reg" %}
+
+ ins_cost(ALU_COST);
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ andr(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ as_Register($src2$$reg));
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+// Immediate And
+instruct andI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
+ match(Set dst (AndI src1 src2));
+
+ format %{ "andi $dst, $src1, $src2\t#@andI_reg_imm" %}
+
+ ins_cost(ALU_COST);
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ andi(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ (int32_t)($src2$$constant));
+ %}
+
+ ins_pipe(ialu_reg_imm);
+%}
+
+// Register Or
+instruct orI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
+ match(Set dst (OrI src1 src2));
+
+ format %{ "orr $dst, $src1, $src2\t#@orI_reg_reg" %}
+
+ ins_cost(ALU_COST);
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ orr(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ as_Register($src2$$reg));
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+// Immediate Or
+instruct orI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
+ match(Set dst (OrI src1 src2));
+
+ format %{ "ori $dst, $src1, $src2\t#@orI_reg_imm" %}
+
+ ins_cost(ALU_COST);
+ ins_encode %{
+ __ ori(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ (int32_t)($src2$$constant));
+ %}
+
+ ins_pipe(ialu_reg_imm);
+%}
+
+// Register Xor
+instruct xorI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
+ match(Set dst (XorI src1 src2));
+
+ format %{ "xorr $dst, $src1, $src2\t#@xorI_reg_reg" %}
+
+ ins_cost(ALU_COST);
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ xorr(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ as_Register($src2$$reg));
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+// Immediate Xor
+instruct xorI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
+ match(Set dst (XorI src1 src2));
+
+ format %{ "xori $dst, $src1, $src2\t#@xorI_reg_imm" %}
+
+ ins_cost(ALU_COST);
+ ins_encode %{
+ __ xori(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ (int32_t)($src2$$constant));
+ %}
+
+ ins_pipe(ialu_reg_imm);
+%}
+
+// Register And Long
+instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+ match(Set dst (AndL src1 src2));
+
+ format %{ "andr $dst, $src1, $src2\t#@andL_reg_reg" %}
+
+ ins_cost(ALU_COST);
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ andr(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ as_Register($src2$$reg));
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+// Immediate And Long
+instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
+ match(Set dst (AndL src1 src2));
+
+ format %{ "andi $dst, $src1, $src2\t#@andL_reg_imm" %}
+
+ ins_cost(ALU_COST);
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ andi(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ (int32_t)($src2$$constant));
+ %}
+
+ ins_pipe(ialu_reg_imm);
+%}
+
+// Register Or Long
+instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+ match(Set dst (OrL src1 src2));
+
+ format %{ "orr $dst, $src1, $src2\t#@orL_reg_reg" %}
+
+ ins_cost(ALU_COST);
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ orr(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ as_Register($src2$$reg));
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+// Immediate Or Long
+instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
+ match(Set dst (OrL src1 src2));
+
+ format %{ "ori $dst, $src1, $src2\t#@orL_reg_imm" %}
+
+ ins_cost(ALU_COST);
+ ins_encode %{
+ __ ori(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ (int32_t)($src2$$constant));
+ %}
+
+ ins_pipe(ialu_reg_imm);
+%}
+
+// Register Xor Long
+instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+ match(Set dst (XorL src1 src2));
+
+ format %{ "xorr $dst, $src1, $src2\t#@xorL_reg_reg" %}
+
+ ins_cost(ALU_COST);
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ xorr(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ as_Register($src2$$reg));
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+// Immediate Xor Long
+instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
+ match(Set dst (XorL src1 src2));
+
+ ins_cost(ALU_COST);
+ format %{ "xori $dst, $src1, $src2\t#@xorL_reg_imm" %}
+
+ ins_encode %{
+ __ xori(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ (int32_t)($src2$$constant));
+ %}
+
+ ins_pipe(ialu_reg_imm);
+%}
+
+// ============================================================================
+// BSWAP Instructions
+
+instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr) %{
+ match(Set dst (ReverseBytesI src));
+ effect(TEMP cr);
+
+ ins_cost(ALU_COST * 13);
+ format %{ "revb_w_w $dst, $src\t#@bytes_reverse_int" %}
+
+ ins_encode %{
+ __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+instruct bytes_reverse_long(iRegLNoSp dst, iRegL src, rFlagsReg cr) %{
+ match(Set dst (ReverseBytesL src));
+ effect(TEMP cr);
+
+ ins_cost(ALU_COST * 29);
+ format %{ "revb $dst, $src\t#@bytes_reverse_long" %}
+
+ ins_encode %{
+ __ revb(as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
+ match(Set dst (ReverseBytesUS src));
+
+ ins_cost(ALU_COST * 5);
+ format %{ "revb_h_h_u $dst, $src\t#@bytes_reverse_unsigned_short" %}
+
+ ins_encode %{
+ __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
+ match(Set dst (ReverseBytesS src));
+
+ ins_cost(ALU_COST * 5);
+ format %{ "revb_h_h $dst, $src\t#@bytes_reverse_short" %}
+
+ ins_encode %{
+ __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+// ============================================================================
+// MemBar Instruction
+
+instruct load_fence() %{
+ match(LoadFence);
+ ins_cost(ALU_COST);
+
+ format %{ "#@load_fence" %}
+
+ ins_encode %{
+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+ %}
+ ins_pipe(pipe_serial);
+%}
+
+instruct membar_acquire() %{
+ match(MemBarAcquire);
+ ins_cost(ALU_COST);
+
+ format %{ "#@membar_acquire\n\t"
+ "fence ir iorw" %}
+
+ ins_encode %{
+ __ block_comment("membar_acquire");
+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct membar_acquire_lock() %{
+ match(MemBarAcquireLock);
+ ins_cost(0);
+
+ format %{ "#@membar_acquire_lock (elided)" %}
+
+ ins_encode %{
+ __ block_comment("membar_acquire_lock (elided)");
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct store_fence() %{
+ match(StoreFence);
+ ins_cost(ALU_COST);
+
+ format %{ "#@store_fence" %}
+
+ ins_encode %{
+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
+ %}
+ ins_pipe(pipe_serial);
+%}
+
+instruct membar_release() %{
+ match(MemBarRelease);
+ ins_cost(ALU_COST);
+
+ format %{ "#@membar_release\n\t"
+ "fence iorw ow" %}
+
+ ins_encode %{
+ __ block_comment("membar_release");
+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
+ %}
+ ins_pipe(pipe_serial);
+%}
+
+instruct membar_storestore() %{
+ match(MemBarStoreStore);
+ match(StoreStoreFence);
+ ins_cost(ALU_COST);
+
+ format %{ "MEMBAR-store-store\t#@membar_storestore" %}
+
+ ins_encode %{
+ __ membar(MacroAssembler::StoreStore);
+ %}
+ ins_pipe(pipe_serial);
+%}
+
+instruct membar_release_lock() %{
+ match(MemBarReleaseLock);
+ ins_cost(0);
+
+ format %{ "#@membar_release_lock (elided)" %}
+
+ ins_encode %{
+ __ block_comment("membar_release_lock (elided)");
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+instruct membar_volatile() %{
+ match(MemBarVolatile);
+ ins_cost(ALU_COST);
+
+ format %{ "#@membar_volatile\n\t"
+ "fence iorw iorw"%}
+
+ ins_encode %{
+ __ block_comment("membar_volatile");
+ __ membar(MacroAssembler::StoreLoad);
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
+// ============================================================================
+// Cast Instructions (Java-level type cast)
+
+instruct castX2P(iRegPNoSp dst, iRegL src) %{
+ match(Set dst (CastX2P src));
+
+ ins_cost(ALU_COST);
+ format %{ "mv $dst, $src\t# long -> ptr, #@castX2P" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ if ($dst$$reg != $src$$reg) {
+ __ mv(as_Register($dst$$reg), as_Register($src$$reg));
+ }
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+instruct castP2X(iRegLNoSp dst, iRegP src) %{
+ match(Set dst (CastP2X src));
+
+ ins_cost(ALU_COST);
+ format %{ "mv $dst, $src\t# ptr -> long, #@castP2X" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ if ($dst$$reg != $src$$reg) {
+ __ mv(as_Register($dst$$reg), as_Register($src$$reg));
+ }
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+instruct castPP(iRegPNoSp dst)
+%{
+ match(Set dst (CastPP dst));
+ ins_cost(0);
+
+ size(0);
+ format %{ "# castPP of $dst, #@castPP" %}
+ ins_encode(/* empty encoding */);
+ ins_pipe(pipe_class_empty);
+%}
+
+instruct castLL(iRegL dst)
+%{
+ match(Set dst (CastLL dst));
+
+ size(0);
+ format %{ "# castLL of $dst, #@castLL" %}
+ ins_encode(/* empty encoding */);
+ ins_cost(0);
+ ins_pipe(pipe_class_empty);
+%}
+
+instruct castII(iRegI dst)
+%{
+ match(Set dst (CastII dst));
+
+ size(0);
+ format %{ "# castII of $dst, #@castII" %}
+ ins_encode(/* empty encoding */);
+ ins_cost(0);
+ ins_pipe(pipe_class_empty);
+%}
+
+instruct checkCastPP(iRegPNoSp dst)
+%{
+ match(Set dst (CheckCastPP dst));
+
+ size(0);
+ ins_cost(0);
+ format %{ "# checkcastPP of $dst, #@checkCastPP" %}
+ ins_encode(/* empty encoding */);
+ ins_pipe(pipe_class_empty);
+%}
+
+instruct castFF(fRegF dst)
+%{
+ match(Set dst (CastFF dst));
+
+ size(0);
+ format %{ "# castFF of $dst" %}
+ ins_encode(/* empty encoding */);
+ ins_cost(0);
+ ins_pipe(pipe_class_empty);
+%}
+
+instruct castDD(fRegD dst)
+%{
+ match(Set dst (CastDD dst));
+
+ size(0);
+ format %{ "# castDD of $dst" %}
+ ins_encode(/* empty encoding */);
+ ins_cost(0);
+ ins_pipe(pipe_class_empty);
+%}
+
+instruct castVV(vReg dst)
+%{
+ match(Set dst (CastVV dst));
+
+ size(0);
+ format %{ "# castVV of $dst" %}
+ ins_encode(/* empty encoding */);
+ ins_cost(0);
+ ins_pipe(pipe_class_empty);
+%}
+
+// ============================================================================
+// Convert Instructions
+
+// int to bool
+instruct convI2Bool(iRegINoSp dst, iRegI src)
+%{
+ match(Set dst (Conv2B src));
+
+ ins_cost(ALU_COST);
+ format %{ "snez $dst, $src\t#@convI2Bool" %}
+
+ ins_encode %{
+ __ snez(as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+// pointer to bool
+instruct convP2Bool(iRegINoSp dst, iRegP src)
+%{
+ match(Set dst (Conv2B src));
+
+ ins_cost(ALU_COST);
+ format %{ "snez $dst, $src\t#@convP2Bool" %}
+
+ ins_encode %{
+ __ snez(as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+// int <-> long
+
+instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
+%{
+ match(Set dst (ConvI2L src));
+
+ ins_cost(ALU_COST);
+ format %{ "addw $dst, $src, zr\t#@convI2L_reg_reg" %}
+ ins_encode %{
+ __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
+ match(Set dst (ConvL2I src));
+
+ ins_cost(ALU_COST);
+ format %{ "addw $dst, $src, zr\t#@convL2I_reg" %}
+
+ ins_encode %{
+ __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr);
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+// int to unsigned long (Zero-extend)
+instruct convI2UL_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
+%{
+ match(Set dst (AndL (ConvI2L src) mask));
+
+ ins_cost(ALU_COST * 2);
+ format %{ "zero_extend $dst, $src, 32\t# i2ul, #@convI2UL_reg_reg" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ zero_extend(as_Register($dst$$reg), as_Register($src$$reg), 32);
+ %}
+
+ ins_pipe(ialu_reg_shift);
+%}
+
+// float <-> double
+
+instruct convF2D_reg(fRegD dst, fRegF src) %{
+ match(Set dst (ConvF2D src));
+
+ ins_cost(XFER_COST);
+ format %{ "fcvt.d.s $dst, $src\t#@convF2D_reg" %}
+
+ ins_encode %{
+ __ fcvt_d_s(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
+ %}
+
+ ins_pipe(fp_f2d);
+%}
+
+instruct convD2F_reg(fRegF dst, fRegD src) %{
+ match(Set dst (ConvD2F src));
+
+ ins_cost(XFER_COST);
+ format %{ "fcvt.s.d $dst, $src\t#@convD2F_reg" %}
+
+ ins_encode %{
+ __ fcvt_s_d(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
+ %}
+
+ ins_pipe(fp_d2f);
+%}
+
+// float <-> int
+
+instruct convF2I_reg_reg(iRegINoSp dst, fRegF src) %{
+ match(Set dst (ConvF2I src));
+
+ ins_cost(XFER_COST);
+ format %{ "fcvt.w.s $dst, $src\t#@convF2I_reg_reg" %}
+
+ ins_encode %{
+ __ fcvt_w_s_safe($dst$$Register, $src$$FloatRegister);
+ %}
+
+ ins_pipe(fp_f2i);
+%}
+
+instruct convI2F_reg_reg(fRegF dst, iRegIorL2I src) %{
+ match(Set dst (ConvI2F src));
+
+ ins_cost(XFER_COST);
+ format %{ "fcvt.s.w $dst, $src\t#@convI2F_reg_reg" %}
+
+ ins_encode %{
+ __ fcvt_s_w(as_FloatRegister($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(fp_i2f);
+%}
+
+// float <-> long
+
+instruct convF2L_reg_reg(iRegLNoSp dst, fRegF src) %{
+ match(Set dst (ConvF2L src));
+
+ ins_cost(XFER_COST);
+ format %{ "fcvt.l.s $dst, $src\t#@convF2L_reg_reg" %}
+
+ ins_encode %{
+ __ fcvt_l_s_safe($dst$$Register, $src$$FloatRegister);
+ %}
+
+ ins_pipe(fp_f2l);
+%}
+
+instruct convL2F_reg_reg(fRegF dst, iRegL src) %{
+ match(Set dst (ConvL2F src));
+
+ ins_cost(XFER_COST);
+ format %{ "fcvt.s.l $dst, $src\t#@convL2F_reg_reg" %}
+
+ ins_encode %{
+ __ fcvt_s_l(as_FloatRegister($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(fp_l2f);
+%}
+
+// double <-> int
+
+instruct convD2I_reg_reg(iRegINoSp dst, fRegD src) %{
+ match(Set dst (ConvD2I src));
+
+ ins_cost(XFER_COST);
+ format %{ "fcvt.w.d $dst, $src\t#@convD2I_reg_reg" %}
+
+ ins_encode %{
+ __ fcvt_w_d_safe($dst$$Register, $src$$FloatRegister);
+ %}
+
+ ins_pipe(fp_d2i);
+%}
+
+instruct convI2D_reg_reg(fRegD dst, iRegIorL2I src) %{
+ match(Set dst (ConvI2D src));
+
+ ins_cost(XFER_COST);
+ format %{ "fcvt.d.w $dst, $src\t#@convI2D_reg_reg" %}
+
+ ins_encode %{
+ __ fcvt_d_w(as_FloatRegister($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(fp_i2d);
+%}
+
+// double <-> long
+
+instruct convD2L_reg_reg(iRegLNoSp dst, fRegD src) %{
+ match(Set dst (ConvD2L src));
+
+ ins_cost(XFER_COST);
+ format %{ "fcvt.l.d $dst, $src\t#@convD2L_reg_reg" %}
+
+ ins_encode %{
+ __ fcvt_l_d_safe($dst$$Register, $src$$FloatRegister);
+ %}
+
+ ins_pipe(fp_d2l);
+%}
+
+instruct convL2D_reg_reg(fRegD dst, iRegL src) %{
+ match(Set dst (ConvL2D src));
+
+ ins_cost(XFER_COST);
+ format %{ "fcvt.d.l $dst, $src\t#@convL2D_reg_reg" %}
+
+ ins_encode %{
+ __ fcvt_d_l(as_FloatRegister($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(fp_l2d);
+%}
+
+// Convert oop into int for vectors alignment masking
+instruct convP2I(iRegINoSp dst, iRegP src) %{
+ match(Set dst (ConvL2I (CastP2X src)));
+
+ ins_cost(ALU_COST * 2);
+ format %{ "zero_extend $dst, $src, 32\t# ptr -> int, #@convP2I" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ zero_extend($dst$$Register, $src$$Register, 32);
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+// Convert compressed oop into int for vectors alignment masking
+// in case of 32bit oops (heap < 4Gb).
+instruct convN2I(iRegINoSp dst, iRegN src)
+%{
+ predicate(CompressedOops::shift() == 0);
+ match(Set dst (ConvL2I (CastP2X (DecodeN src))));
+
+ ins_cost(ALU_COST);
+ format %{ "mv $dst, $src\t# compressed ptr -> int, #@convN2I" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ mv($dst$$Register, $src$$Register);
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+// Convert oop pointer into compressed form
+instruct encodeHeapOop(iRegNNoSp dst, iRegP src) %{
+ match(Set dst (EncodeP src));
+ ins_cost(ALU_COST);
+ format %{ "encode_heap_oop $dst, $src\t#@encodeHeapOop" %}
+ ins_encode %{
+ Register s = $src$$Register;
+ Register d = $dst$$Register;
+ __ encode_heap_oop(d, s);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct decodeHeapOop(iRegPNoSp dst, iRegN src) %{
+ predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
+ n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
+ match(Set dst (DecodeN src));
+
+ ins_cost(0);
+ format %{ "decode_heap_oop $dst, $src\t#@decodeHeapOop" %}
+ ins_encode %{
+ Register s = $src$$Register;
+ Register d = $dst$$Register;
+ __ decode_heap_oop(d, s);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src) %{
+ predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
+ n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
+ match(Set dst (DecodeN src));
+
+ ins_cost(0);
+ format %{ "decode_heap_oop_not_null $dst, $src\t#@decodeHeapOop_not_null" %}
+ ins_encode %{
+ Register s = $src$$Register;
+ Register d = $dst$$Register;
+ __ decode_heap_oop_not_null(d, s);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+// Convert klass pointer into compressed form.
+instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
+ match(Set dst (EncodePKlass src));
+
+ ins_cost(ALU_COST);
+ format %{ "encode_klass_not_null $dst, $src\t#@encodeKlass_not_null" %}
+
+ ins_encode %{
+ Register src_reg = as_Register($src$$reg);
+ Register dst_reg = as_Register($dst$$reg);
+ __ encode_klass_not_null(dst_reg, src_reg, t0);
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src, iRegPNoSp tmp) %{
+ match(Set dst (DecodeNKlass src));
+
+ effect(TEMP tmp);
+
+ ins_cost(ALU_COST);
+ format %{ "decode_klass_not_null $dst, $src\t#@decodeKlass_not_null" %}
+
+ ins_encode %{
+ Register src_reg = as_Register($src$$reg);
+ Register dst_reg = as_Register($dst$$reg);
+ Register tmp_reg = as_Register($tmp$$reg);
+ __ decode_klass_not_null(dst_reg, src_reg, tmp_reg);
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+// stack <-> reg and reg <-> reg shuffles with no conversion
+
+instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
+
+ match(Set dst (MoveF2I src));
+
+ effect(DEF dst, USE src);
+
+ ins_cost(LOAD_COST);
+
+ format %{ "lw $dst, $src\t#@MoveF2I_stack_reg" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ lw(as_Register($dst$$reg), Address(sp, $src$$disp));
+ %}
+
+ ins_pipe(iload_reg_reg);
+
+%}
+
+instruct MoveI2F_stack_reg(fRegF dst, stackSlotI src) %{
+
+ match(Set dst (MoveI2F src));
+
+ effect(DEF dst, USE src);
+
+ ins_cost(LOAD_COST);
+
+ format %{ "flw $dst, $src\t#@MoveI2F_stack_reg" %}
+
+ ins_encode %{
+ __ flw(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
+ %}
+
+ ins_pipe(pipe_class_memory);
+
+%}
+
+instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
+
+ match(Set dst (MoveD2L src));
+
+ effect(DEF dst, USE src);
+
+ ins_cost(LOAD_COST);
+
+ format %{ "ld $dst, $src\t#@MoveD2L_stack_reg" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ ld(as_Register($dst$$reg), Address(sp, $src$$disp));
+ %}
+
+ ins_pipe(iload_reg_reg);
+
+%}
+
+instruct MoveL2D_stack_reg(fRegD dst, stackSlotL src) %{
+
+ match(Set dst (MoveL2D src));
+
+ effect(DEF dst, USE src);
+
+ ins_cost(LOAD_COST);
+
+ format %{ "fld $dst, $src\t#@MoveL2D_stack_reg" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ fld(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
+ %}
+
+ ins_pipe(pipe_class_memory);
+
+%}
+
+instruct MoveF2I_reg_stack(stackSlotI dst, fRegF src) %{
+
+ match(Set dst (MoveF2I src));
+
+ effect(DEF dst, USE src);
+
+ ins_cost(STORE_COST);
+
+ format %{ "fsw $src, $dst\t#@MoveF2I_reg_stack" %}
+
+ ins_encode %{
+ __ fsw(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
+ %}
+
+ ins_pipe(pipe_class_memory);
+
+%}
+
+instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
+
+ match(Set dst (MoveI2F src));
+
+ effect(DEF dst, USE src);
+
+ ins_cost(STORE_COST);
+
+ format %{ "sw $src, $dst\t#@MoveI2F_reg_stack" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ sw(as_Register($src$$reg), Address(sp, $dst$$disp));
+ %}
+
+ ins_pipe(istore_reg_reg);
+
+%}
+
+instruct MoveD2L_reg_stack(stackSlotL dst, fRegD src) %{
+
+ match(Set dst (MoveD2L src));
+
+ effect(DEF dst, USE src);
+
+ ins_cost(STORE_COST);
+
+ format %{ "fsd $dst, $src\t#@MoveD2L_reg_stack" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ fsd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
+ %}
+
+ ins_pipe(pipe_class_memory);
+
+%}
+
+instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
+
+ match(Set dst (MoveL2D src));
+
+ effect(DEF dst, USE src);
+
+ ins_cost(STORE_COST);
+
+ format %{ "sd $src, $dst\t#@MoveL2D_reg_stack" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ __ sd(as_Register($src$$reg), Address(sp, $dst$$disp));
+ %}
+
+ ins_pipe(istore_reg_reg);
+
+%}
+
+instruct MoveF2I_reg_reg(iRegINoSp dst, fRegF src) %{
+
+ match(Set dst (MoveF2I src));
+
+ effect(DEF dst, USE src);
+
+ ins_cost(XFER_COST);
+
+ format %{ "fmv.x.w $dst, $src\t#@MoveL2D_reg_stack" %}
+
+ ins_encode %{
+ __ fmv_x_w(as_Register($dst$$reg), as_FloatRegister($src$$reg));
+ %}
+
+ ins_pipe(fp_f2i);
+
+%}
+
+instruct MoveI2F_reg_reg(fRegF dst, iRegI src) %{
+
+ match(Set dst (MoveI2F src));
+
+ effect(DEF dst, USE src);
+
+ ins_cost(XFER_COST);
+
+ format %{ "fmv.w.x $dst, $src\t#@MoveI2F_reg_reg" %}
+
+ ins_encode %{
+ __ fmv_w_x(as_FloatRegister($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(fp_i2f);
+
+%}
+
+instruct MoveD2L_reg_reg(iRegLNoSp dst, fRegD src) %{
+
+ match(Set dst (MoveD2L src));
+
+ effect(DEF dst, USE src);
+
+ ins_cost(XFER_COST);
+
+ format %{ "fmv.x.d $dst, $src\t#@MoveD2L_reg_reg" %}
+
+ ins_encode %{
+ __ fmv_x_d(as_Register($dst$$reg), as_FloatRegister($src$$reg));
+ %}
+
+ ins_pipe(fp_d2l);
+
+%}
+
+instruct MoveL2D_reg_reg(fRegD dst, iRegL src) %{
+
+ match(Set dst (MoveL2D src));
+
+ effect(DEF dst, USE src);
+
+ ins_cost(XFER_COST);
+
+ format %{ "fmv.d.x $dst, $src\t#@MoveD2L_reg_reg" %}
+
+ ins_encode %{
+ __ fmv_d_x(as_FloatRegister($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(fp_l2d);
+%}
+
+// ============================================================================
+// Compare Instructions which set the result float comparisons in dest register.
+
+instruct cmpF3_reg_reg(iRegINoSp dst, fRegF op1, fRegF op2)
+%{
+ match(Set dst (CmpF3 op1 op2));
+
+ ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST);
+ format %{ "flt.s $dst, $op2, $op1\t#@cmpF3_reg_reg\n\t"
+ "bgtz $dst, done\n\t"
+ "feq.s $dst, $op1, $op2\n\t"
+ "addi $dst, $dst, -1\t#@cmpF3_reg_reg"
+ %}
+
+ ins_encode %{
+ // we want -1 for unordered or less than, 0 for equal and 1 for greater than.
+ __ float_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg),
+ as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/);
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
+instruct cmpD3_reg_reg(iRegINoSp dst, fRegD op1, fRegD op2)
+%{
+ match(Set dst (CmpD3 op1 op2));
+
+ ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST);
+ format %{ "flt.d $dst, $op2, $op1\t#@cmpD3_reg_reg\n\t"
+ "bgtz $dst, done\n\t"
+ "feq.d $dst, $op1, $op2\n\t"
+ "addi $dst, $dst, -1\t#@cmpD3_reg_reg"
+ %}
+
+ ins_encode %{
+ // we want -1 for unordered or less than, 0 for equal and 1 for greater than.
+ __ double_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/);
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
+instruct cmpL3_reg_reg(iRegINoSp dst, iRegL op1, iRegL op2)
+%{
+ match(Set dst (CmpL3 op1 op2));
+
+ ins_cost(ALU_COST * 3 + BRANCH_COST);
+ format %{ "slt $dst, $op2, $op1\t#@cmpL3_reg_reg\n\t"
+ "bnez $dst, done\n\t"
+ "slt $dst, $op1, $op2\n\t"
+ "neg $dst, $dst\t#@cmpL3_reg_reg"
+ %}
+ ins_encode %{
+ __ cmp_l2i(t0, as_Register($op1$$reg), as_Register($op2$$reg));
+ __ mv(as_Register($dst$$reg), t0);
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
+instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegI p, iRegI q)
+%{
+ match(Set dst (CmpLTMask p q));
+
+ ins_cost(2 * ALU_COST);
+
+ format %{ "slt $dst, $p, $q\t#@cmpLTMask_reg_reg\n\t"
+ "subw $dst, zr, $dst\t#@cmpLTMask_reg_reg"
+ %}
+
+ ins_encode %{
+ __ slt(as_Register($dst$$reg), as_Register($p$$reg), as_Register($q$$reg));
+ __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I op, immI0 zero)
+%{
+ match(Set dst (CmpLTMask op zero));
+
+ ins_cost(ALU_COST);
+
+ format %{ "sraiw $dst, $dst, 31\t#@cmpLTMask_reg_reg" %}
+
+ ins_encode %{
+ __ sraiw(as_Register($dst$$reg), as_Register($op$$reg), 31);
+ %}
+
+ ins_pipe(ialu_reg_shift);
+%}
+
+
+// ============================================================================
+// Max and Min
+
+instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2)
+%{
+ match(Set dst (MinI src1 src2));
+
+ effect(DEF dst, USE src1, USE src2);
+
+ ins_cost(BRANCH_COST + ALU_COST * 2);
+ format %{
+ "ble $src1, $src2, Lsrc1.\t#@minI_rReg\n\t"
+ "mv $dst, $src2\n\t"
+ "j Ldone\n\t"
+ "bind Lsrc1\n\t"
+ "mv $dst, $src1\n\t"
+ "bind\t#@minI_rReg"
+ %}
+
+ ins_encode %{
+ Label Lsrc1, Ldone;
+ __ ble(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1);
+ __ mv(as_Register($dst$$reg), as_Register($src2$$reg));
+ __ j(Ldone);
+ __ bind(Lsrc1);
+ __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
+ __ bind(Ldone);
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2)
+%{
+ match(Set dst (MaxI src1 src2));
+
+ effect(DEF dst, USE src1, USE src2);
+
+ ins_cost(BRANCH_COST + ALU_COST * 2);
+ format %{
+ "bge $src1, $src2, Lsrc1\t#@maxI_rReg\n\t"
+ "mv $dst, $src2\n\t"
+ "j Ldone\n\t"
+ "bind Lsrc1\n\t"
+ "mv $dst, $src1\n\t"
+ "bind\t#@maxI_rReg"
+ %}
+
+ ins_encode %{
+ Label Lsrc1, Ldone;
+ __ bge(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1);
+ __ mv(as_Register($dst$$reg), as_Register($src2$$reg));
+ __ j(Ldone);
+ __ bind(Lsrc1);
+ __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
+ __ bind(Ldone);
+
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+// ============================================================================
+// Branch Instructions
+// Direct Branch.
+instruct branch(label lbl)
+%{
+ match(Goto);
+
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST);
+ format %{ "j $lbl\t#@branch" %}
+
+ ins_encode(riscv_enc_j(lbl));
+
+ ins_pipe(pipe_branch);
+%}
+
+// ============================================================================
+// Compare and Branch Instructions
+
+// Patterns for short (< 12KiB) variants
+
+// Compare flags and branch near instructions.
+instruct cmpFlag_branch(cmpOpEqNe cmp, rFlagsReg cr, label lbl) %{
+ match(If cmp cr);
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST);
+ format %{ "b$cmp $cr, zr, $lbl\t#@cmpFlag_branch" %}
+
+ ins_encode %{
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label));
+ %}
+ ins_pipe(pipe_cmpz_branch);
+ ins_short_branch(1);
+%}
+
+// Compare signed int and branch near instructions
+instruct cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl)
+%{
+ // Same match rule as `far_cmpI_branch'.
+ match(If cmp (CmpI op1 op2));
+
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpI_branch" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_cmp_branch);
+ ins_short_branch(1);
+%}
+
+instruct cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl)
+%{
+ // Same match rule as `far_cmpI_loop'.
+ match(CountedLoopEnd cmp (CmpI op1 op2));
+
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpI_loop" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_cmp_branch);
+ ins_short_branch(1);
+%}
+
+// Compare unsigned int and branch near instructions
+instruct cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl)
+%{
+ // Same match rule as `far_cmpU_branch'.
+ match(If cmp (CmpU op1 op2));
+
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpU_branch" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+ as_Register($op2$$reg), *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_cmp_branch);
+ ins_short_branch(1);
+%}
+
+instruct cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl)
+%{
+ // Same match rule as `far_cmpU_loop'.
+ match(CountedLoopEnd cmp (CmpU op1 op2));
+
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpU_loop" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+ as_Register($op2$$reg), *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_cmp_branch);
+ ins_short_branch(1);
+%}
+
+// Compare signed long and branch near instructions
+instruct cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl)
+%{
+ // Same match rule as `far_cmpL_branch'.
+ match(If cmp (CmpL op1 op2));
+
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpL_branch" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_cmp_branch);
+ ins_short_branch(1);
+%}
+
+instruct cmpL_loop(cmpOp cmp, iRegL op1, iRegL op2, label lbl)
+%{
+ // Same match rule as `far_cmpL_loop'.
+ match(CountedLoopEnd cmp (CmpL op1 op2));
+
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpL_loop" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_cmp_branch);
+ ins_short_branch(1);
+%}
+
+// Compare unsigned long and branch near instructions
+instruct cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl)
+%{
+ // Same match rule as `far_cmpUL_branch'.
+ match(If cmp (CmpUL op1 op2));
+
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST);
+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpUL_branch" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+ as_Register($op2$$reg), *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_cmp_branch);
+ ins_short_branch(1);
+%}
+
+instruct cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl)
+%{
+ // Same match rule as `far_cmpUL_loop'.
+ match(CountedLoopEnd cmp (CmpUL op1 op2));
+
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST);
+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpUL_loop" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+ as_Register($op2$$reg), *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_cmp_branch);
+ ins_short_branch(1);
+%}
+
+// Compare pointer and branch near instructions
+instruct cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
+%{
+ // Same match rule as `far_cmpP_branch'.
+ match(If cmp (CmpP op1 op2));
+
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpP_branch" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+ as_Register($op2$$reg), *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_cmp_branch);
+ ins_short_branch(1);
+%}
+
+instruct cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
+%{
+ // Same match rule as `far_cmpP_loop'.
+ match(CountedLoopEnd cmp (CmpP op1 op2));
+
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpP_loop" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+ as_Register($op2$$reg), *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_cmp_branch);
+ ins_short_branch(1);
+%}
+
+// Compare narrow pointer and branch near instructions
+instruct cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
+%{
+ // Same match rule as `far_cmpN_branch'.
+ match(If cmp (CmpN op1 op2));
+
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpN_branch" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+ as_Register($op2$$reg), *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_cmp_branch);
+ ins_short_branch(1);
+%}
+
+instruct cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
+%{
+ // Same match rule as `far_cmpN_loop'.
+ match(CountedLoopEnd cmp (CmpN op1 op2));
+
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpN_loop" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+ as_Register($op2$$reg), *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_cmp_branch);
+ ins_short_branch(1);
+%}
+
+// Compare float and branch near instructions
+instruct cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
+%{
+ // Same match rule as `far_cmpF_branch'.
+ match(If cmp (CmpF op1 op2));
+
+ effect(USE lbl);
+
+ ins_cost(XFER_COST + BRANCH_COST);
+ format %{ "float_b$cmp $op1, $op2 \t#@cmpF_branch"%}
+
+ ins_encode %{
+ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_class_compare);
+ ins_short_branch(1);
+%}
+
+instruct cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
+%{
+ // Same match rule as `far_cmpF_loop'.
+ match(CountedLoopEnd cmp (CmpF op1 op2));
+ effect(USE lbl);
+
+ ins_cost(XFER_COST + BRANCH_COST);
+ format %{ "float_b$cmp $op1, $op2\t#@cmpF_loop"%}
+
+ ins_encode %{
+ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_class_compare);
+ ins_short_branch(1);
+%}
+
+// Compare double and branch near instructions
+instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
+%{
+ // Same match rule as `far_cmpD_branch'.
+ match(If cmp (CmpD op1 op2));
+ effect(USE lbl);
+
+ ins_cost(XFER_COST + BRANCH_COST);
+ format %{ "double_b$cmp $op1, $op2\t#@cmpD_branch"%}
+
+ ins_encode %{
+ __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
+ as_FloatRegister($op2$$reg), *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_class_compare);
+ ins_short_branch(1);
+%}
+
+instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
+%{
+ // Same match rule as `far_cmpD_loop'.
+ match(CountedLoopEnd cmp (CmpD op1 op2));
+ effect(USE lbl);
+
+ ins_cost(XFER_COST + BRANCH_COST);
+ format %{ "double_b$cmp $op1, $op2\t#@cmpD_loop"%}
+
+ ins_encode %{
+ __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
+ as_FloatRegister($op2$$reg), *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_class_compare);
+ ins_short_branch(1);
+%}
+
+// Compare signed int with zero and branch near instructions
+instruct cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
+%{
+ // Same match rule as `far_cmpI_reg_imm0_branch'.
+ match(If cmp (CmpI op1 zero));
+
+ effect(USE op1, USE lbl);
+
+ ins_cost(BRANCH_COST);
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpI_reg_imm0_branch" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+ ins_short_branch(1);
+%}
+
+instruct cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
+%{
+ // Same match rule as `far_cmpI_reg_imm0_loop'.
+ match(CountedLoopEnd cmp (CmpI op1 zero));
+
+ effect(USE op1, USE lbl);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpI_reg_imm0_loop" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+ ins_short_branch(1);
+%}
+
+// Compare unsigned int with zero and branch near instructions
+instruct cmpUEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
+%{
+ // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_branch'.
+ match(If cmp (CmpU op1 zero));
+
+ effect(USE op1, USE lbl);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_branch" %}
+
+ ins_encode %{
+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+ ins_short_branch(1);
+%}
+
+instruct cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
+%{
+ // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_loop'.
+ match(CountedLoopEnd cmp (CmpU op1 zero));
+
+ effect(USE op1, USE lbl);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_loop" %}
+
+
+ ins_encode %{
+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+ ins_short_branch(1);
+%}
+
+// Compare signed long with zero and branch near instructions
+instruct cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
+%{
+ // Same match rule as `far_cmpL_reg_imm0_branch'.
+ match(If cmp (CmpL op1 zero));
+
+ effect(USE op1, USE lbl);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpL_reg_imm0_branch" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+ ins_short_branch(1);
+%}
+
+instruct cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
+%{
+ // Same match rule as `far_cmpL_reg_imm0_loop'.
+ match(CountedLoopEnd cmp (CmpL op1 zero));
+
+ effect(USE op1, USE lbl);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpL_reg_imm0_loop" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+ ins_short_branch(1);
+%}
+
+// Compare unsigned long with zero and branch near instructions
+instruct cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
+%{
+ // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_branch'.
+ match(If cmp (CmpUL op1 zero));
+
+ effect(USE op1, USE lbl);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_branch" %}
+
+ ins_encode %{
+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+ ins_short_branch(1);
+%}
+
+instruct cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
+%{
+ // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_loop'.
+ match(CountedLoopEnd cmp (CmpUL op1 zero));
+
+ effect(USE op1, USE lbl);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_loop" %}
+
+ ins_encode %{
+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+ ins_short_branch(1);
+%}
+
+// Compare pointer with zero and branch near instructions
+instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
+ // Same match rule as `far_cmpP_reg_imm0_branch'.
+ match(If cmp (CmpP op1 zero));
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST);
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_imm0_branch" %}
+
+ ins_encode %{
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+ ins_short_branch(1);
+%}
+
+instruct cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
+ // Same match rule as `far_cmpP_reg_imm0_loop'.
+ match(CountedLoopEnd cmp (CmpP op1 zero));
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST);
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_imm0_loop" %}
+
+ ins_encode %{
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+ ins_short_branch(1);
+%}
+
+// Compare narrow pointer with zero and branch near instructions
+instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
+ // Same match rule as `far_cmpN_reg_imm0_branch'.
+ match(If cmp (CmpN op1 zero));
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpN_imm0_branch" %}
+
+ ins_encode %{
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+ ins_short_branch(1);
+%}
+
+instruct cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
+ // Same match rule as `far_cmpN_reg_imm0_loop'.
+ match(CountedLoopEnd cmp (CmpN op1 zero));
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpN_imm0_loop" %}
+
+ ins_encode %{
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+ ins_short_branch(1);
+%}
+
+// Compare narrow pointer with pointer zero and branch near instructions
+instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
+ // Same match rule as `far_cmpP_narrowOop_imm0_branch'.
+ match(If cmp (CmpP (DecodeN op1) zero));
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST);
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_branch" %}
+
+ ins_encode %{
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+ ins_short_branch(1);
+%}
+
+instruct cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
+ // Same match rule as `far_cmpP_narrowOop_imm0_loop'.
+ match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero));
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST);
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_loop" %}
+
+ ins_encode %{
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+ ins_short_branch(1);
+%}
+
+// Patterns for far (20KiB) variants
+
+instruct far_cmpFlag_branch(cmpOp cmp, rFlagsReg cr, label lbl) %{
+ match(If cmp cr);
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST);
+ format %{ "far_b$cmp $cr, zr, L\t#@far_cmpFlag_branch"%}
+
+ ins_encode %{
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+%}
+
+// Compare signed int and branch far instructions
+instruct far_cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{
+ match(If cmp (CmpI op1 op2));
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST * 2);
+
+ // the format instruction [far_b$cmp] here is be used as two insructions
+ // in macroassembler: b$not_cmp(op1, op2, done), j($lbl), bind(done)
+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpI_branch" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{
+ match(CountedLoopEnd cmp (CmpI op1 op2));
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST * 2);
+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpI_loop" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{
+ match(If cmp (CmpU op1 op2));
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST * 2);
+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_branch" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{
+ match(CountedLoopEnd cmp (CmpU op1 op2));
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST * 2);
+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_loop" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{
+ match(If cmp (CmpL op1 op2));
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST * 2);
+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpL_branch" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpLloop(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{
+ match(CountedLoopEnd cmp (CmpL op1 op2));
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST * 2);
+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpL_loop" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{
+ match(If cmp (CmpUL op1 op2));
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST * 2);
+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpUL_branch" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{
+ match(CountedLoopEnd cmp (CmpUL op1 op2));
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST * 2);
+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpUL_loop" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
+%{
+ match(If cmp (CmpP op1 op2));
+
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST * 2);
+
+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpP_branch" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
+%{
+ match(CountedLoopEnd cmp (CmpP op1 op2));
+
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST * 2);
+
+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpP_loop" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
+%{
+ match(If cmp (CmpN op1 op2));
+
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST * 2);
+
+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpN_branch" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
+%{
+ match(CountedLoopEnd cmp (CmpN op1 op2));
+
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST * 2);
+
+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpN_loop" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmp_branch);
+%}
+
+// Float compare and branch instructions
+instruct far_cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
+%{
+ match(If cmp (CmpF op1 op2));
+
+ effect(USE lbl);
+
+ ins_cost(XFER_COST + BRANCH_COST * 2);
+ format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_branch"%}
+
+ ins_encode %{
+ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
+ *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_class_compare);
+%}
+
+instruct far_cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
+%{
+ match(CountedLoopEnd cmp (CmpF op1 op2));
+ effect(USE lbl);
+
+ ins_cost(XFER_COST + BRANCH_COST * 2);
+ format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_loop"%}
+
+ ins_encode %{
+ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
+ *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_class_compare);
+%}
+
+// Double compare and branch instructions
+instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
+%{
+ match(If cmp (CmpD op1 op2));
+ effect(USE lbl);
+
+ ins_cost(XFER_COST + BRANCH_COST * 2);
+ format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_branch"%}
+
+ ins_encode %{
+ __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
+ as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_class_compare);
+%}
+
+instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
+%{
+ match(CountedLoopEnd cmp (CmpD op1 op2));
+ effect(USE lbl);
+
+ ins_cost(XFER_COST + BRANCH_COST * 2);
+ format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_loop"%}
+
+ ins_encode %{
+ __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
+ as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_class_compare);
+%}
+
+instruct far_cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
+%{
+ match(If cmp (CmpI op1 zero));
+
+ effect(USE op1, USE lbl);
+
+ ins_cost(BRANCH_COST * 2);
+
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpI_reg_imm0_branch" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
+%{
+ match(CountedLoopEnd cmp (CmpI op1 zero));
+
+ effect(USE op1, USE lbl);
+
+ ins_cost(BRANCH_COST * 2);
+
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpI_reg_imm0_loop" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpUEqNeLeGt_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
+%{
+ match(If cmp (CmpU op1 zero));
+
+ effect(USE op1, USE lbl);
+
+ ins_cost(BRANCH_COST * 2);
+
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_imm0_branch" %}
+
+ ins_encode %{
+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
+%{
+ match(CountedLoopEnd cmp (CmpU op1 zero));
+
+ effect(USE op1, USE lbl);
+
+ ins_cost(BRANCH_COST * 2);
+
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_reg_imm0_loop" %}
+
+
+ ins_encode %{
+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+%}
+
+// compare lt/ge unsigned instructs has no short instruct with same match
+instruct far_cmpULtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl)
+%{
+ match(If cmp (CmpU op1 zero));
+
+ effect(USE op1, USE lbl);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "j $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_branch" %}
+
+ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
+
+ ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpULtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl)
+%{
+ match(CountedLoopEnd cmp (CmpU op1 zero));
+
+ effect(USE op1, USE lbl);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "j $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_loop" %}
+
+ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
+
+ ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
+%{
+ match(If cmp (CmpL op1 zero));
+
+ effect(USE op1, USE lbl);
+
+ ins_cost(BRANCH_COST * 2);
+
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpL_reg_imm0_branch" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
+%{
+ match(CountedLoopEnd cmp (CmpL op1 zero));
+
+ effect(USE op1, USE lbl);
+
+ ins_cost(BRANCH_COST * 2);
+
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpL_reg_imm0_loop" %}
+
+ ins_encode %{
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
+%{
+ match(If cmp (CmpUL op1 zero));
+
+ effect(USE op1, USE lbl);
+
+ ins_cost(BRANCH_COST * 2);
+
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_branch" %}
+
+ ins_encode %{
+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
+%{
+ match(CountedLoopEnd cmp (CmpUL op1 zero));
+
+ effect(USE op1, USE lbl);
+
+ ins_cost(BRANCH_COST * 2);
+
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_loop" %}
+
+ ins_encode %{
+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+%}
+
+// compare lt/ge unsigned instructs has no short instruct with same match
+instruct far_cmpULLtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl)
+%{
+ match(If cmp (CmpUL op1 zero));
+
+ effect(USE op1, USE lbl);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "j $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_branch" %}
+
+ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
+
+ ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpULLtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl)
+%{
+ match(CountedLoopEnd cmp (CmpUL op1 zero));
+
+ effect(USE op1, USE lbl);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "j $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_loop" %}
+
+ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
+
+ ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
+ match(If cmp (CmpP op1 zero));
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST * 2);
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_imm0_branch" %}
+
+ ins_encode %{
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
+ match(CountedLoopEnd cmp (CmpP op1 zero));
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST * 2);
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_imm0_loop" %}
+
+ ins_encode %{
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
+ match(If cmp (CmpN op1 zero));
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST * 2);
+
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpN_imm0_branch" %}
+
+ ins_encode %{
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
+ match(CountedLoopEnd cmp (CmpN op1 zero));
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST * 2);
+
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpN_imm0_loop" %}
+
+ ins_encode %{
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
+ match(If cmp (CmpP (DecodeN op1) zero));
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST * 2);
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_branch" %}
+
+ ins_encode %{
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
+ match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero));
+ effect(USE lbl);
+
+ ins_cost(BRANCH_COST * 2);
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_loop" %}
+
+ ins_encode %{
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
+ %}
+
+ ins_pipe(pipe_cmpz_branch);
+%}
+
+// ============================================================================
+// Conditional Move Instructions
+instruct cmovI_cmpI(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOp cop) %{
+ match(Set dst (CMoveI (Binary cop (CmpI op1 op2)) (Binary dst src)));
+ ins_cost(ALU_COST + BRANCH_COST);
+
+ format %{
+ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpI\n\t"
+ "mv $dst, $src\n\t"
+ "skip:"
+ %}
+
+ ins_encode %{
+ __ enc_cmove($cop$$cmpcode,
+ as_Register($op1$$reg), as_Register($op2$$reg),
+ as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop) %{
+ match(Set dst (CMoveI (Binary cop (CmpU op1 op2)) (Binary dst src)));
+ ins_cost(ALU_COST + BRANCH_COST);
+
+ format %{
+ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpU\n\t"
+ "mv $dst, $src\n\t"
+ "skip:"
+ %}
+
+ ins_encode %{
+ __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
+ as_Register($op1$$reg), as_Register($op2$$reg),
+ as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct cmovI_cmpL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOp cop) %{
+ match(Set dst (CMoveI (Binary cop (CmpL op1 op2)) (Binary dst src)));
+ ins_cost(ALU_COST + BRANCH_COST);
+
+ format %{
+ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpL\n\t"
+ "mv $dst, $src\n\t"
+ "skip:"
+ %}
+
+ ins_encode %{
+ __ enc_cmove($cop$$cmpcode,
+ as_Register($op1$$reg), as_Register($op2$$reg),
+ as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct cmovL_cmpL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOp cop) %{
+ match(Set dst (CMoveL (Binary cop (CmpL op1 op2)) (Binary dst src)));
+ ins_cost(ALU_COST + BRANCH_COST);
+
+ format %{
+ "bneg$cop $op1, $op2, skip\t#@cmovL_cmpL\n\t"
+ "mv $dst, $src\n\t"
+ "skip:"
+ %}
+
+ ins_encode %{
+ __ enc_cmove($cop$$cmpcode,
+ as_Register($op1$$reg), as_Register($op2$$reg),
+ as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct cmovL_cmpUL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOpU cop) %{
+ match(Set dst (CMoveL (Binary cop (CmpUL op1 op2)) (Binary dst src)));
+ ins_cost(ALU_COST + BRANCH_COST);
+
+ format %{
+ "bneg$cop $op1, $op2, skip\t#@cmovL_cmpUL\n\t"
+ "mv $dst, $src\n\t"
+ "skip:"
+ %}
+
+ ins_encode %{
+ __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
+ as_Register($op1$$reg), as_Register($op2$$reg),
+ as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) %{
+ match(Set dst (CMoveI (Binary cop (CmpUL op1 op2)) (Binary dst src)));
+ ins_cost(ALU_COST + BRANCH_COST);
+ format %{
+ "bneg$cop $op1, $op2\t#@cmovI_cmpUL\n\t"
+ "mv $dst, $src\n\t"
+ "skip:"
+ %}
+
+ ins_encode %{
+ __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
+ as_Register($op1$$reg), as_Register($op2$$reg),
+ as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+
+// ============================================================================
+// Procedure Call/Return Instructions
+
+// Call Java Static Instruction
+// Note: If this code changes, the corresponding ret_addr_offset() and
+// compute_padding() functions will have to be adjusted.
+instruct CallStaticJavaDirect(method meth)
+%{
+ match(CallStaticJava);
+
+ effect(USE meth);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "CALL,static $meth\t#@CallStaticJavaDirect" %}
+
+ ins_encode(riscv_enc_java_static_call(meth),
+ riscv_enc_call_epilog);
+
+ ins_pipe(pipe_class_call);
+ ins_alignment(4);
+%}
+
+// TO HERE
+
+// Call Java Dynamic Instruction
+// Note: If this code changes, the corresponding ret_addr_offset() and
+// compute_padding() functions will have to be adjusted.
+instruct CallDynamicJavaDirect(method meth, rFlagsReg cr)
+%{
+ match(CallDynamicJava);
+
+ effect(USE meth, KILL cr);
+
+ ins_cost(BRANCH_COST + ALU_COST * 6);
+
+ format %{ "CALL,dynamic $meth\t#@CallDynamicJavaDirect" %}
+
+ ins_encode(riscv_enc_java_dynamic_call(meth),
+ riscv_enc_call_epilog);
+
+ ins_pipe(pipe_class_call);
+ ins_alignment(4);
+%}
+
+// Call Runtime Instruction
+
+instruct CallRuntimeDirect(method meth, rFlagsReg cr)
+%{
+ match(CallRuntime);
+
+ effect(USE meth, KILL cr);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "CALL, runtime $meth\t#@CallRuntimeDirect" %}
+
+ ins_encode(riscv_enc_java_to_runtime(meth));
+
+ ins_pipe(pipe_class_call);
+%}
+
+// Call Runtime Instruction
+
+instruct CallLeafDirect(method meth, rFlagsReg cr)
+%{
+ match(CallLeaf);
+
+ effect(USE meth, KILL cr);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "CALL, runtime leaf $meth\t#@CallLeafDirect" %}
+
+ ins_encode(riscv_enc_java_to_runtime(meth));
+
+ ins_pipe(pipe_class_call);
+%}
+
+// Call Runtime Instruction
+
+instruct CallLeafNoFPDirect(method meth, rFlagsReg cr)
+%{
+ match(CallLeafNoFP);
+
+ effect(USE meth, KILL cr);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "CALL, runtime leaf nofp $meth\t#@CallLeafNoFPDirect" %}
+
+ ins_encode(riscv_enc_java_to_runtime(meth));
+
+ ins_pipe(pipe_class_call);
+%}
+
+// ============================================================================
+// Partial Subtype Check
+//
+// superklass array for an instance of the superklass. Set a hidden
+// internal cache on a hit (cache is checked with exposed code in
+// gen_subtype_check()). Return zero for a hit. The encoding
+// ALSO sets flags.
+
+instruct partialSubtypeCheck(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 tmp, rFlagsReg cr)
+%{
+ match(Set result (PartialSubtypeCheck sub super));
+ effect(KILL tmp, KILL cr);
+
+ ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4);
+ format %{ "partialSubtypeCheck $result, $sub, $super\t#@partialSubtypeCheck" %}
+
+ ins_encode(riscv_enc_partial_subtype_check(sub, super, tmp, result));
+
+ opcode(0x1); // Force zero of result reg on hit
+
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct partialSubtypeCheckVsZero(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 tmp,
+ immP0 zero, rFlagsReg cr)
+%{
+ match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
+ effect(KILL tmp, KILL result);
+
+ ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4);
+ format %{ "partialSubtypeCheck $result, $sub, $super == 0\t#@partialSubtypeCheckVsZero" %}
+
+ ins_encode(riscv_enc_partial_subtype_check(sub, super, tmp, result));
+
+ opcode(0x0); // Don't zero result reg on hit
+
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
+ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
+%{
+ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
+ match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
+ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
+
+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %}
+ ins_encode %{
+ // Count is in 8-bit bytes; non-Compact chars are 16 bits.
+ __ string_compare($str1$$Register, $str2$$Register,
+ $cnt1$$Register, $cnt2$$Register, $result$$Register,
+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
+ StrIntrinsicNode::UU);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
+ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
+%{
+ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
+ match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
+ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
+
+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %}
+ ins_encode %{
+ __ string_compare($str1$$Register, $str2$$Register,
+ $cnt1$$Register, $cnt2$$Register, $result$$Register,
+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
+ StrIntrinsicNode::LL);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct string_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
+ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
+%{
+ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
+ match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
+ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
+
+ format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %}
+ ins_encode %{
+ __ string_compare($str1$$Register, $str2$$Register,
+ $cnt1$$Register, $cnt2$$Register, $result$$Register,
+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
+ StrIntrinsicNode::UL);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct string_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
+ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3,
+ rFlagsReg cr)
+%{
+ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
+ match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
+ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
+
+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %}
+ ins_encode %{
+ __ string_compare($str1$$Register, $str2$$Register,
+ $cnt1$$Register, $cnt2$$Register, $result$$Register,
+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
+ StrIntrinsicNode::LU);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct string_indexofUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
+ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
+%{
+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
+ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
+
+ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
+ ins_encode %{
+ __ string_indexof($str1$$Register, $str2$$Register,
+ $cnt1$$Register, $cnt2$$Register,
+ $tmp1$$Register, $tmp2$$Register,
+ $tmp3$$Register, $tmp4$$Register,
+ $tmp5$$Register, $tmp6$$Register,
+ $result$$Register, StrIntrinsicNode::UU);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct string_indexofLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
+ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
+%{
+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
+ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
+
+ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
+ ins_encode %{
+ __ string_indexof($str1$$Register, $str2$$Register,
+ $cnt1$$Register, $cnt2$$Register,
+ $tmp1$$Register, $tmp2$$Register,
+ $tmp3$$Register, $tmp4$$Register,
+ $tmp5$$Register, $tmp6$$Register,
+ $result$$Register, StrIntrinsicNode::LL);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct string_indexofUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
+ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
+%{
+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
+ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
+ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
+
+ ins_encode %{
+ __ string_indexof($str1$$Register, $str2$$Register,
+ $cnt1$$Register, $cnt2$$Register,
+ $tmp1$$Register, $tmp2$$Register,
+ $tmp3$$Register, $tmp4$$Register,
+ $tmp5$$Register, $tmp6$$Register,
+ $result$$Register, StrIntrinsicNode::UL);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct string_indexof_conUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
+ immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
+ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
+%{
+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
+ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+
+ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
+
+ ins_encode %{
+ int icnt2 = (int)$int_cnt2$$constant;
+ __ string_indexof_linearscan($str1$$Register, $str2$$Register,
+ $cnt1$$Register, zr,
+ $tmp1$$Register, $tmp2$$Register,
+ $tmp3$$Register, $tmp4$$Register,
+ icnt2, $result$$Register, StrIntrinsicNode::UU);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct string_indexof_conLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
+ immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
+ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
+%{
+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
+ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+
+ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
+ ins_encode %{
+ int icnt2 = (int)$int_cnt2$$constant;
+ __ string_indexof_linearscan($str1$$Register, $str2$$Register,
+ $cnt1$$Register, zr,
+ $tmp1$$Register, $tmp2$$Register,
+ $tmp3$$Register, $tmp4$$Register,
+ icnt2, $result$$Register, StrIntrinsicNode::LL);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct string_indexof_conUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
+ immI_1 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
+ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
+%{
+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
+ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+
+ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
+ ins_encode %{
+ int icnt2 = (int)$int_cnt2$$constant;
+ __ string_indexof_linearscan($str1$$Register, $str2$$Register,
+ $cnt1$$Register, zr,
+ $tmp1$$Register, $tmp2$$Register,
+ $tmp3$$Register, $tmp4$$Register,
+ icnt2, $result$$Register, StrIntrinsicNode::UL);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
+ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
+%{
+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
+ predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
+ effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+
+ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
+ ins_encode %{
+ __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
+ $result$$Register, $tmp1$$Register, $tmp2$$Register,
+ $tmp3$$Register, $tmp4$$Register, false /* isU */);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+
+instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
+ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
+%{
+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
+ predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
+ effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+
+ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
+ ins_encode %{
+ __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
+ $result$$Register, $tmp1$$Register, $tmp2$$Register,
+ $tmp3$$Register, $tmp4$$Register, true /* isL */);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+// clearing of an array
+instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy)
+%{
+ predicate(!UseRVV);
+ match(Set dummy (ClearArray cnt base));
+ effect(USE_KILL cnt, USE_KILL base);
+
+ ins_cost(4 * DEFAULT_COST);
+ format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %}
+
+ ins_encode %{
+ address tpc = __ zero_words($base$$Register, $cnt$$Register);
+ if (tpc == NULL) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
+ }
+ %}
+
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr)
+%{
+ predicate(!UseRVV && (uint64_t)n->in(2)->get_long()
+ < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
+ match(Set dummy (ClearArray cnt base));
+ effect(USE_KILL base, KILL cr);
+
+ ins_cost(4 * DEFAULT_COST);
+ format %{ "ClearArray $cnt, $base\t#@clearArray_imm_reg" %}
+
+ ins_encode %{
+ __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
+ %}
+
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
+ iRegI_R10 result, rFlagsReg cr)
+%{
+ predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
+ match(Set result (StrEquals (Binary str1 str2) cnt));
+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
+
+ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %}
+ ins_encode %{
+ // Count is in 8-bit bytes; non-Compact chars are 16 bits.
+ __ string_equals($str1$$Register, $str2$$Register,
+ $result$$Register, $cnt$$Register, 1);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct string_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
+ iRegI_R10 result, rFlagsReg cr)
+%{
+ predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
+ match(Set result (StrEquals (Binary str1 str2) cnt));
+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
+
+ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %}
+ ins_encode %{
+ // Count is in 8-bit bytes; non-Compact chars are 16 bits.
+ __ string_equals($str1$$Register, $str2$$Register,
+ $result$$Register, $cnt$$Register, 2);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct array_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
+ iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
+ iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr)
+%{
+ predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
+ match(Set result (AryEq ary1 ary2));
+ effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr);
+
+ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp5" %}
+ ins_encode %{
+ __ arrays_equals($ary1$$Register, $ary2$$Register,
+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register,
+ $result$$Register, $tmp5$$Register, 1);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
+ iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
+ iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr)
+%{
+ predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
+ match(Set result (AryEq ary1 ary2));
+ effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr);
+
+ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp5" %}
+ ins_encode %{
+ __ arrays_equals($ary1$$Register, $ary2$$Register,
+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register,
+ $result$$Register, $tmp5$$Register, 2);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+// ============================================================================
+// Safepoint Instructions
+
+instruct safePoint(iRegP poll)
+%{
+ match(SafePoint poll);
+
+ ins_cost(2 * LOAD_COST);
+ format %{
+ "lwu zr, [$poll]\t# Safepoint: poll for GC, #@safePoint"
+ %}
+ ins_encode %{
+ __ read_polling_page(as_Register($poll$$reg), 0, relocInfo::poll_type);
+ %}
+ ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
+%}
+
+// ============================================================================
+// This name is KNOWN by the ADLC and cannot be changed.
+// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
+// for this guy.
+instruct tlsLoadP(javaThread_RegP dst)
+%{
+ match(Set dst (ThreadLocal));
+
+ ins_cost(0);
+
+ format %{ " -- \t// $dst=Thread::current(), empty, #@tlsLoadP" %}
+
+ size(0);
+
+ ins_encode( /*empty*/ );
+
+ ins_pipe(pipe_class_empty);
+%}
+
+// inlined locking and unlocking
+// using t1 as the 'flag' register to bridge the BoolNode producers and consumers
+instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2)
+%{
+ match(Set cr (FastLock object box));
+ effect(TEMP tmp1, TEMP tmp2);
+
+ ins_cost(LOAD_COST * 2 + STORE_COST * 3 + ALU_COST * 6 + BRANCH_COST * 3);
+ format %{ "fastlock $object,$box\t! kills $tmp1,$tmp2, #@cmpFastLock" %}
+
+ ins_encode(riscv_enc_fast_lock(object, box, tmp1, tmp2));
+
+ ins_pipe(pipe_serial);
+%}
+
+// using t1 as the 'flag' register to bridge the BoolNode producers and consumers
+instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2)
+%{
+ match(Set cr (FastUnlock object box));
+ effect(TEMP tmp1, TEMP tmp2);
+
+ ins_cost(LOAD_COST * 2 + STORE_COST + ALU_COST * 2 + BRANCH_COST * 4);
+ format %{ "fastunlock $object,$box\t! kills $tmp1, $tmp2, #@cmpFastUnlock" %}
+
+ ins_encode(riscv_enc_fast_unlock(object, box, tmp1, tmp2));
+
+ ins_pipe(pipe_serial);
+%}
+
+// Tail Call; Jump from runtime stub to Java code.
+// Also known as an 'interprocedural jump'.
+// Target of jump will eventually return to caller.
+// TailJump below removes the return address.
+instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
+%{
+ match(TailCall jump_target method_oop);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "jalr $jump_target\t# $method_oop holds method oop, #@TailCalljmpInd." %}
+
+ ins_encode(riscv_enc_tail_call(jump_target));
+
+ ins_pipe(pipe_class_call);
+%}
+
+instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R10 ex_oop)
+%{
+ match(TailJump jump_target ex_oop);
+
+ ins_cost(ALU_COST + BRANCH_COST);
+
+ format %{ "jalr $jump_target\t# $ex_oop holds exception oop, #@TailjmpInd." %}
+
+ ins_encode(riscv_enc_tail_jmp(jump_target));
+
+ ins_pipe(pipe_class_call);
+%}
+
+// Create exception oop: created by stack-crawling runtime code.
+// Created exception is now available to this handler, and is setup
+// just prior to jumping to this handler. No code emitted.
+instruct CreateException(iRegP_R10 ex_oop)
+%{
+ match(Set ex_oop (CreateEx));
+
+ ins_cost(0);
+ format %{ " -- \t// exception oop; no code emitted, #@CreateException" %}
+
+ size(0);
+
+ ins_encode( /*empty*/ );
+
+ ins_pipe(pipe_class_empty);
+%}
+
+// Rethrow exception: The exception oop will come in the first
+// argument position. Then JUMP (not call) to the rethrow stub code.
+instruct RethrowException()
+%{
+ match(Rethrow);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "j rethrow_stub\t#@RethrowException" %}
+
+ ins_encode(riscv_enc_rethrow());
+
+ ins_pipe(pipe_class_call);
+%}
+
+// Return Instruction
+// epilog node loads ret address into ra as part of frame pop
+instruct Ret()
+%{
+ match(Return);
+
+ ins_cost(BRANCH_COST);
+ format %{ "ret\t// return register, #@Ret" %}
+
+ ins_encode(riscv_enc_ret());
+
+ ins_pipe(pipe_branch);
+%}
+
+// Die now.
+instruct ShouldNotReachHere() %{
+ match(Halt);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "#@ShouldNotReachHere" %}
+
+ ins_encode %{
+ Assembler::CompressibleRegion cr(&_masm);
+ if (is_reachable()) {
+ __ halt();
+ }
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
+
+//----------PEEPHOLE RULES-----------------------------------------------------
+// These must follow all instruction definitions as they use the names
+// defined in the instructions definitions.
+//
+// peepmatch ( root_instr_name [preceding_instruction]* );
+//
+// peepconstraint %{
+// (instruction_number.operand_name relational_op instruction_number.operand_name
+// [, ...] );
+// // instruction numbers are zero-based using left to right order in peepmatch
+//
+// peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
+// // provide an instruction_number.operand_name for each operand that appears
+// // in the replacement instruction's match rule
+//
+// ---------VM FLAGS---------------------------------------------------------
+//
+// All peephole optimizations can be turned off using -XX:-OptoPeephole
+//
+// Each peephole rule is given an identifying number starting with zero and
+// increasing by one in the order seen by the parser. An individual peephole
+// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
+// on the command-line.
+//
+// ---------CURRENT LIMITATIONS----------------------------------------------
+//
+// Only match adjacent instructions in same basic block
+// Only equality constraints
+// Only constraints between operands, not (0.dest_reg == RAX_enc)
+// Only one replacement instruction
+//
+//----------SMARTSPILL RULES---------------------------------------------------
+// These must follow all instruction definitions as they use the names
+// defined in the instructions definitions.
+
+// Local Variables:
+// mode: c++
+// End:
diff --git a/src/hotspot/cpu/riscv/riscv_b.ad b/src/hotspot/cpu/riscv/riscv_b.ad
new file mode 100644
index 0000000000000000000000000000000000000000..bbebe13f0a8385a2b25da26b086becb1086d51c7
--- /dev/null
+++ b/src/hotspot/cpu/riscv/riscv_b.ad
@@ -0,0 +1,527 @@
+//
+// Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+// RISCV Bit-Manipulation Extension Architecture Description File
+
+instruct rorI_imm_b(iRegINoSp dst, iRegI src, immI shift) %{
+ predicate(UseZbb);
+ match(Set dst (RotateRight src shift));
+
+ format %{ "roriw $dst, $src, ($shift & 0x1f)\t#@rorI_imm_b" %}
+
+ ins_cost(ALU_COST);
+ ins_encode %{
+ __ roriw(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x1f);
+ %}
+
+ ins_pipe(ialu_reg_shift);
+%}
+
+instruct rorL_imm_b(iRegLNoSp dst, iRegL src, immI shift) %{
+ predicate(UseZbb);
+ match(Set dst (RotateRight src shift));
+
+ format %{ "rori $dst, $src, ($shift & 0x3f)\t#@rorL_imm_b" %}
+
+ ins_cost(ALU_COST);
+ ins_encode %{
+ __ rori(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x3f);
+ %}
+
+ ins_pipe(ialu_reg_shift);
+%}
+
+instruct rorI_reg_b(iRegINoSp dst, iRegI src, iRegI shift) %{
+ predicate(UseZbb);
+ match(Set dst (RotateRight src shift));
+
+ format %{ "rorw $dst, $src, $shift\t#@rorI_reg_b" %}
+ ins_cost(ALU_COST);
+ ins_encode %{
+ __ rorw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct rorL_reg_b(iRegLNoSp dst, iRegL src, iRegI shift) %{
+ predicate(UseZbb);
+ match(Set dst (RotateRight src shift));
+
+ format %{ "ror $dst, $src, $shift\t#@rorL_reg_b" %}
+ ins_cost(ALU_COST);
+ ins_encode %{
+ __ ror(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct rolI_reg_b(iRegINoSp dst, iRegI src, iRegI shift) %{
+ predicate(UseZbb);
+ match(Set dst (RotateLeft src shift));
+
+ format %{ "rolw $dst, $src, $shift\t#@rolI_reg_b" %}
+ ins_cost(ALU_COST);
+ ins_encode %{
+ __ rolw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct rolL_reg_b(iRegLNoSp dst, iRegL src, iRegI shift) %{
+ predicate(UseZbb);
+ match(Set dst (RotateLeft src shift));
+
+ format %{ "rol $dst, $src, $shift\t#@rolL_reg_b" %}
+ ins_cost(ALU_COST);
+ ins_encode %{
+ __ rol(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+// Convert oop into int for vectors alignment masking
+instruct convP2I_b(iRegINoSp dst, iRegP src) %{
+ predicate(UseZba);
+ match(Set dst (ConvL2I (CastP2X src)));
+
+ format %{ "zext.w $dst, $src\t# ptr -> int @convP2I_b" %}
+
+ ins_cost(ALU_COST);
+ ins_encode %{
+ __ zext_w(as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+// byte to int
+instruct convB2I_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_24 lshift, immI_24 rshift) %{
+ predicate(UseZbb);
+ match(Set dst (RShiftI (LShiftI src lshift) rshift));
+
+ format %{ "sext.b $dst, $src\t# b2i, #@convB2I_reg_reg_b" %}
+
+ ins_cost(ALU_COST);
+ ins_encode %{
+ __ sext_b(as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+// int to short
+instruct convI2S_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16 lshift, immI_16 rshift) %{
+ predicate(UseZbb);
+ match(Set dst (RShiftI (LShiftI src lshift) rshift));
+
+ format %{ "sext.h $dst, $src\t# i2s, #@convI2S_reg_reg_b" %}
+
+ ins_cost(ALU_COST);
+ ins_encode %{
+ __ sext_h(as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+// short to unsigned int
+instruct convS2UI_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16bits mask) %{
+ predicate(UseZbb);
+ match(Set dst (AndI src mask));
+
+ format %{ "zext.h $dst, $src\t# s2ui, #@convS2UI_reg_reg_b" %}
+
+ ins_cost(ALU_COST);
+ ins_encode %{
+ __ zext_h(as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+// int to unsigned long (zero extend)
+instruct convI2UL_reg_reg_b(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) %{
+ predicate(UseZba);
+ match(Set dst (AndL (ConvI2L src) mask));
+
+ format %{ "zext.w $dst, $src\t# i2ul, #@convI2UL_reg_reg_b" %}
+
+ ins_cost(ALU_COST);
+ ins_encode %{
+ __ zext_w(as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(ialu_reg_shift);
+%}
+
+// BSWAP instructions
+instruct bytes_reverse_int_b(iRegINoSp dst, iRegIorL2I src) %{
+ predicate(UseZbb);
+ match(Set dst (ReverseBytesI src));
+
+ ins_cost(ALU_COST * 2);
+ format %{ "revb_w_w $dst, $src\t#@bytes_reverse_int_b" %}
+
+ ins_encode %{
+ __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+instruct bytes_reverse_long_b(iRegLNoSp dst, iRegL src) %{
+ predicate(UseZbb);
+ match(Set dst (ReverseBytesL src));
+
+ ins_cost(ALU_COST);
+ format %{ "rev8 $dst, $src\t#@bytes_reverse_long_b" %}
+
+ ins_encode %{
+ __ rev8(as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+instruct bytes_reverse_unsigned_short_b(iRegINoSp dst, iRegIorL2I src) %{
+ predicate(UseZbb);
+ match(Set dst (ReverseBytesUS src));
+
+ ins_cost(ALU_COST * 2);
+ format %{ "revb_h_h_u $dst, $src\t#@bytes_reverse_unsigned_short_b" %}
+
+ ins_encode %{
+ __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+instruct bytes_reverse_short_b(iRegINoSp dst, iRegIorL2I src) %{
+ predicate(UseZbb);
+ match(Set dst (ReverseBytesS src));
+
+ ins_cost(ALU_COST * 2);
+ format %{ "revb_h_h $dst, $src\t#@bytes_reverse_short_b" %}
+
+ ins_encode %{
+ __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+// Shift Add Pointer
+instruct shaddP_reg_reg_b(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale imm) %{
+ predicate(UseZba);
+ match(Set dst (AddP src1 (LShiftL src2 imm)));
+
+ ins_cost(ALU_COST);
+ format %{ "shadd $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_b" %}
+
+ ins_encode %{
+ __ shadd(as_Register($dst$$reg),
+ as_Register($src2$$reg),
+ as_Register($src1$$reg),
+ t0,
+ $imm$$constant);
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct shaddP_reg_reg_ext_b(iRegPNoSp dst, iRegP src1, iRegI src2, immIScale imm) %{
+ predicate(UseZba);
+ match(Set dst (AddP src1 (LShiftL (ConvI2L src2) imm)));
+
+ ins_cost(ALU_COST);
+ format %{ "shadd $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_ext_b" %}
+
+ ins_encode %{
+ __ shadd(as_Register($dst$$reg),
+ as_Register($src2$$reg),
+ as_Register($src1$$reg),
+ t0,
+ $imm$$constant);
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+// Shift Add Long
+instruct shaddL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immIScale imm) %{
+ predicate(UseZba);
+ match(Set dst (AddL src1 (LShiftL src2 imm)));
+
+ ins_cost(ALU_COST);
+ format %{ "shadd $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_b" %}
+
+ ins_encode %{
+ __ shadd(as_Register($dst$$reg),
+ as_Register($src2$$reg),
+ as_Register($src1$$reg),
+ t0,
+ $imm$$constant);
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct shaddL_reg_reg_ext_b(iRegLNoSp dst, iRegL src1, iRegI src2, immIScale imm) %{
+ predicate(UseZba);
+ match(Set dst (AddL src1 (LShiftL (ConvI2L src2) imm)));
+
+ ins_cost(ALU_COST);
+ format %{ "shadd $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_ext_b" %}
+
+ ins_encode %{
+ __ shadd(as_Register($dst$$reg),
+ as_Register($src2$$reg),
+ as_Register($src1$$reg),
+ t0,
+ $imm$$constant);
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+// Zeros Count instructions
+instruct countLeadingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{
+ predicate(UseZbb);
+ match(Set dst (CountLeadingZerosI src));
+
+ ins_cost(ALU_COST);
+ format %{ "clzw $dst, $src\t#@countLeadingZerosI_b" %}
+
+ ins_encode %{
+ __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+instruct countLeadingZerosL_b(iRegINoSp dst, iRegL src) %{
+ predicate(UseZbb);
+ match(Set dst (CountLeadingZerosL src));
+
+ ins_cost(ALU_COST);
+ format %{ "clz $dst, $src\t#@countLeadingZerosL_b" %}
+
+ ins_encode %{
+ __ clz(as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+instruct countTrailingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{
+ predicate(UseZbb);
+ match(Set dst (CountTrailingZerosI src));
+
+ ins_cost(ALU_COST);
+ format %{ "ctzw $dst, $src\t#@countTrailingZerosI_b" %}
+
+ ins_encode %{
+ __ ctzw(as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+instruct countTrailingZerosL_b(iRegINoSp dst, iRegL src) %{
+ predicate(UseZbb);
+ match(Set dst (CountTrailingZerosL src));
+
+ ins_cost(ALU_COST);
+ format %{ "ctz $dst, $src\t#@countTrailingZerosL_b" %}
+
+ ins_encode %{
+ __ ctz(as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+// Population Count instructions
+instruct popCountI_b(iRegINoSp dst, iRegIorL2I src) %{
+ predicate(UsePopCountInstruction);
+ match(Set dst (PopCountI src));
+
+ ins_cost(ALU_COST);
+ format %{ "cpopw $dst, $src\t#@popCountI_b" %}
+
+ ins_encode %{
+ __ cpopw(as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+// Note: Long/bitCount(long) returns an int.
+instruct popCountL_b(iRegINoSp dst, iRegL src) %{
+ predicate(UsePopCountInstruction);
+ match(Set dst (PopCountL src));
+
+ ins_cost(ALU_COST);
+ format %{ "cpop $dst, $src\t#@popCountL_b" %}
+
+ ins_encode %{
+ __ cpop(as_Register($dst$$reg), as_Register($src$$reg));
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+// Max and Min
+instruct minI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{
+ predicate(UseZbb);
+ match(Set dst (MinI src1 src2));
+
+ ins_cost(ALU_COST);
+ format %{ "min $dst, $src1, $src2\t#@minI_reg_b" %}
+
+ ins_encode %{
+ __ min(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg));
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct maxI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{
+ predicate(UseZbb);
+ match(Set dst (MaxI src1 src2));
+
+ ins_cost(ALU_COST);
+ format %{ "max $dst, $src1, $src2\t#@maxI_reg_b" %}
+
+ ins_encode %{
+ __ max(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg));
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+// Abs
+instruct absI_reg_b(iRegINoSp dst, iRegI src) %{
+ predicate(UseZbb);
+ match(Set dst (AbsI src));
+
+ ins_cost(ALU_COST * 2);
+ format %{
+ "negw t0, $src\n\t"
+ "max $dst, $src, t0\t#@absI_reg_b"
+ %}
+
+ ins_encode %{
+ __ negw(t0, as_Register($src$$reg));
+ __ max(as_Register($dst$$reg), as_Register($src$$reg), t0);
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct absL_reg_b(iRegLNoSp dst, iRegL src) %{
+ predicate(UseZbb);
+ match(Set dst (AbsL src));
+
+ ins_cost(ALU_COST * 2);
+ format %{
+ "neg t0, $src\n\t"
+ "max $dst, $src, t0\t#@absL_reg_b"
+ %}
+
+ ins_encode %{
+ __ neg(t0, as_Register($src$$reg));
+ __ max(as_Register($dst$$reg), as_Register($src$$reg), t0);
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+// And Not
+instruct andnI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{
+ predicate(UseZbb);
+ match(Set dst (AndI src1 (XorI src2 m1)));
+
+ ins_cost(ALU_COST);
+ format %{ "andn $dst, $src1, $src2\t#@andnI_reg_reg_b" %}
+
+ ins_encode %{
+ __ andn(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ as_Register($src2$$reg));
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct andnL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{
+ predicate(UseZbb);
+ match(Set dst (AndL src1 (XorL src2 m1)));
+
+ ins_cost(ALU_COST);
+ format %{ "andn $dst, $src1, $src2\t#@andnL_reg_reg_b" %}
+
+ ins_encode %{
+ __ andn(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ as_Register($src2$$reg));
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+// Or Not
+instruct ornI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{
+ predicate(UseZbb);
+ match(Set dst (OrI src1 (XorI src2 m1)));
+
+ ins_cost(ALU_COST);
+ format %{ "orn $dst, $src1, $src2\t#@ornI_reg_reg_b" %}
+
+ ins_encode %{
+ __ orn(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ as_Register($src2$$reg));
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct ornL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{
+ predicate(UseZbb);
+ match(Set dst (OrL src1 (XorL src2 m1)));
+
+ ins_cost(ALU_COST);
+ format %{ "orn $dst, $src1, $src2\t#@ornL_reg_reg_b" %}
+
+ ins_encode %{
+ __ orn(as_Register($dst$$reg),
+ as_Register($src1$$reg),
+ as_Register($src2$$reg));
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
\ No newline at end of file
diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad
new file mode 100644
index 0000000000000000000000000000000000000000..3828e096b21b5f00bd4ec68234756cb2bd5aced4
--- /dev/null
+++ b/src/hotspot/cpu/riscv/riscv_v.ad
@@ -0,0 +1,2065 @@
+//
+// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2020, Arm Limited. All rights reserved.
+// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+// RISCV Vector Extension Architecture Description File
+
+opclass vmemA(indirect);
+
+source_hpp %{
+ bool op_vec_supported(int opcode);
+%}
+
+source %{
+
+ static void loadStore(C2_MacroAssembler masm, bool is_store,
+ VectorRegister reg, BasicType bt, Register base) {
+ Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
+ masm.vsetvli(t0, x0, sew);
+ if (is_store) {
+ masm.vsex_v(reg, base, sew);
+ } else {
+ masm.vlex_v(reg, base, sew);
+ }
+ }
+
+ bool op_vec_supported(int opcode) {
+ switch (opcode) {
+ // No multiply reduction instructions
+ case Op_MulReductionVD:
+ case Op_MulReductionVF:
+ case Op_MulReductionVI:
+ case Op_MulReductionVL:
+ // Others
+ case Op_Extract:
+ case Op_ExtractB:
+ case Op_ExtractC:
+ case Op_ExtractD:
+ case Op_ExtractF:
+ case Op_ExtractI:
+ case Op_ExtractL:
+ case Op_ExtractS:
+ case Op_ExtractUB:
+ // Vector API specific
+ case Op_AndReductionV:
+ case Op_OrReductionV:
+ case Op_XorReductionV:
+ case Op_LoadVectorGather:
+ case Op_StoreVectorScatter:
+ case Op_VectorBlend:
+ case Op_VectorCast:
+ case Op_VectorCastB2X:
+ case Op_VectorCastD2X:
+ case Op_VectorCastF2X:
+ case Op_VectorCastI2X:
+ case Op_VectorCastL2X:
+ case Op_VectorCastS2X:
+ case Op_VectorInsert:
+ case Op_VectorLoadConst:
+ case Op_VectorLoadMask:
+ case Op_VectorLoadShuffle:
+ case Op_VectorMaskCmp:
+ case Op_VectorRearrange:
+ case Op_VectorReinterpret:
+ case Op_VectorStoreMask:
+ case Op_VectorTest:
+ return false;
+ default:
+ return UseRVV;
+ }
+ }
+
+%}
+
+definitions %{
+ int_def VEC_COST (200, 200);
+%}
+
+// All VEC instructions
+
+// vector load/store
+instruct loadV(vReg dst, vmemA mem) %{
+ match(Set dst (LoadVector mem));
+ ins_cost(VEC_COST);
+ format %{ "vle $dst, $mem\t#@loadV" %}
+ ins_encode %{
+ VectorRegister dst_reg = as_VectorRegister($dst$$reg);
+ loadStore(C2_MacroAssembler(&cbuf), false, dst_reg,
+ Matcher::vector_element_basic_type(this), as_Register($mem$$base));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct storeV(vReg src, vmemA mem) %{
+ match(Set mem (StoreVector mem src));
+ ins_cost(VEC_COST);
+ format %{ "vse $src, $mem\t#@storeV" %}
+ ins_encode %{
+ VectorRegister src_reg = as_VectorRegister($src$$reg);
+ loadStore(C2_MacroAssembler(&cbuf), true, src_reg,
+ Matcher::vector_element_basic_type(this, $src), as_Register($mem$$base));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector abs
+
+instruct vabsB(vReg dst, vReg src, vReg tmp) %{
+ match(Set dst (AbsVB src));
+ ins_cost(VEC_COST);
+ effect(TEMP tmp);
+ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsB\n\t"
+ "vmax.vv $dst, $tmp, $src" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e8);
+ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
+ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vabsS(vReg dst, vReg src, vReg tmp) %{
+ match(Set dst (AbsVS src));
+ ins_cost(VEC_COST);
+ effect(TEMP tmp);
+ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsS\n\t"
+ "vmax.vv $dst, $tmp, $src" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e16);
+ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
+ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vabsI(vReg dst, vReg src, vReg tmp) %{
+ match(Set dst (AbsVI src));
+ ins_cost(VEC_COST);
+ effect(TEMP tmp);
+ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsI\n\t"
+ "vmax.vv $dst, $tmp, $src" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
+ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vabsL(vReg dst, vReg src, vReg tmp) %{
+ match(Set dst (AbsVL src));
+ ins_cost(VEC_COST);
+ effect(TEMP tmp);
+ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsL\n\t"
+ "vmax.vv $dst, $tmp, $src" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
+ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vabsF(vReg dst, vReg src) %{
+ match(Set dst (AbsVF src));
+ ins_cost(VEC_COST);
+ format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsF" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vabsD(vReg dst, vReg src) %{
+ match(Set dst (AbsVD src));
+ ins_cost(VEC_COST);
+ format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsD" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector add
+
+instruct vaddB(vReg dst, vReg src1, vReg src2) %{
+ match(Set dst (AddVB src1 src2));
+ ins_cost(VEC_COST);
+ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddB" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e8);
+ __ vadd_vv(as_VectorRegister($dst$$reg),
+ as_VectorRegister($src1$$reg),
+ as_VectorRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vaddS(vReg dst, vReg src1, vReg src2) %{
+ match(Set dst (AddVS src1 src2));
+ ins_cost(VEC_COST);
+ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddS" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e16);
+ __ vadd_vv(as_VectorRegister($dst$$reg),
+ as_VectorRegister($src1$$reg),
+ as_VectorRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vaddI(vReg dst, vReg src1, vReg src2) %{
+ match(Set dst (AddVI src1 src2));
+ ins_cost(VEC_COST);
+ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddI" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vadd_vv(as_VectorRegister($dst$$reg),
+ as_VectorRegister($src1$$reg),
+ as_VectorRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vaddL(vReg dst, vReg src1, vReg src2) %{
+ match(Set dst (AddVL src1 src2));
+ ins_cost(VEC_COST);
+ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddL" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vadd_vv(as_VectorRegister($dst$$reg),
+ as_VectorRegister($src1$$reg),
+ as_VectorRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vaddF(vReg dst, vReg src1, vReg src2) %{
+ match(Set dst (AddVF src1 src2));
+ ins_cost(VEC_COST);
+ format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddF" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vfadd_vv(as_VectorRegister($dst$$reg),
+ as_VectorRegister($src1$$reg),
+ as_VectorRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vaddD(vReg dst, vReg src1, vReg src2) %{
+ match(Set dst (AddVD src1 src2));
+ ins_cost(VEC_COST);
+ format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddD" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vfadd_vv(as_VectorRegister($dst$$reg),
+ as_VectorRegister($src1$$reg),
+ as_VectorRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector and
+
+instruct vand(vReg dst, vReg src1, vReg src2) %{
+ match(Set dst (AndV src1 src2));
+ ins_cost(VEC_COST);
+ format %{ "vand.vv $dst, $src1, $src2\t#@vand" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vand_vv(as_VectorRegister($dst$$reg),
+ as_VectorRegister($src1$$reg),
+ as_VectorRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector or
+
+instruct vor(vReg dst, vReg src1, vReg src2) %{
+ match(Set dst (OrV src1 src2));
+ ins_cost(VEC_COST);
+ format %{ "vor.vv $dst, $src1, $src2\t#@vor" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vor_vv(as_VectorRegister($dst$$reg),
+ as_VectorRegister($src1$$reg),
+ as_VectorRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector xor
+
+instruct vxor(vReg dst, vReg src1, vReg src2) %{
+ match(Set dst (XorV src1 src2));
+ ins_cost(VEC_COST);
+ format %{ "vxor.vv $dst, $src1, $src2\t#@vxor" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vxor_vv(as_VectorRegister($dst$$reg),
+ as_VectorRegister($src1$$reg),
+ as_VectorRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector float div
+
+instruct vdivF(vReg dst, vReg src1, vReg src2) %{
+ match(Set dst (DivVF src1 src2));
+ ins_cost(VEC_COST);
+ format %{ "vfdiv.vv $dst, $src1, $src2\t#@vdivF" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vfdiv_vv(as_VectorRegister($dst$$reg),
+ as_VectorRegister($src1$$reg),
+ as_VectorRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vdivD(vReg dst, vReg src1, vReg src2) %{
+ match(Set dst (DivVD src1 src2));
+ ins_cost(VEC_COST);
+ format %{ "vfdiv.vv $dst, $src1, $src2\t#@vdivD" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vfdiv_vv(as_VectorRegister($dst$$reg),
+ as_VectorRegister($src1$$reg),
+ as_VectorRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector integer max/min
+
+instruct vmax(vReg dst, vReg src1, vReg src2) %{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT &&
+ n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE);
+ match(Set dst (MaxV src1 src2));
+ ins_cost(VEC_COST);
+ format %{ "vmax.vv $dst, $src1, $src2\t#@vmax" %}
+ ins_encode %{
+ BasicType bt = Matcher::vector_element_basic_type(this);
+ Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
+ __ vsetvli(t0, x0, sew);
+ __ vmax_vv(as_VectorRegister($dst$$reg),
+ as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vmin(vReg dst, vReg src1, vReg src2) %{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT &&
+ n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE);
+ match(Set dst (MinV src1 src2));
+ ins_cost(VEC_COST);
+ format %{ "vmin.vv $dst, $src1, $src2\t#@vmin" %}
+ ins_encode %{
+ BasicType bt = Matcher::vector_element_basic_type(this);
+ Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
+ __ vsetvli(t0, x0, sew);
+ __ vmin_vv(as_VectorRegister($dst$$reg),
+ as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector float-point max/min
+
+instruct vmaxF(vReg dst, vReg src1, vReg src2) %{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (MaxV src1 src2));
+ effect(TEMP_DEF dst);
+ ins_cost(VEC_COST);
+ format %{ "vmaxF $dst, $src1, $src2\t#@vmaxF" %}
+ ins_encode %{
+ __ minmax_FD_v(as_VectorRegister($dst$$reg),
+ as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
+ false /* is_double */, false /* is_min */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vmaxD(vReg dst, vReg src1, vReg src2) %{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+ match(Set dst (MaxV src1 src2));
+ effect(TEMP_DEF dst);
+ ins_cost(VEC_COST);
+ format %{ "vmaxD $dst, $src1, $src2\t#@vmaxD" %}
+ ins_encode %{
+ __ minmax_FD_v(as_VectorRegister($dst$$reg),
+ as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
+ true /* is_double */, false /* is_min */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vminF(vReg dst, vReg src1, vReg src2) %{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (MinV src1 src2));
+ effect(TEMP_DEF dst);
+ ins_cost(VEC_COST);
+ format %{ "vminF $dst, $src1, $src2\t#@vminF" %}
+ ins_encode %{
+ __ minmax_FD_v(as_VectorRegister($dst$$reg),
+ as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
+ false /* is_double */, true /* is_min */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vminD(vReg dst, vReg src1, vReg src2) %{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+ match(Set dst (MinV src1 src2));
+ effect(TEMP_DEF dst);
+ ins_cost(VEC_COST);
+ format %{ "vminD $dst, $src1, $src2\t#@vminD" %}
+ ins_encode %{
+ __ minmax_FD_v(as_VectorRegister($dst$$reg),
+ as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
+ true /* is_double */, true /* is_min */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector fmla
+
+// dst_src1 = dst_src1 + src2 * src3
+instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA);
+ match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3)));
+ ins_cost(VEC_COST);
+ format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaF" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vfmacc_vv(as_VectorRegister($dst_src1$$reg),
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 + src2 * src3
+instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA);
+ match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3)));
+ ins_cost(VEC_COST);
+ format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaD" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vfmacc_vv(as_VectorRegister($dst_src1$$reg),
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector fmls
+
+// dst_src1 = dst_src1 + -src2 * src3
+// dst_src1 = dst_src1 + src2 * -src3
+instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA);
+ match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3)));
+ match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3))));
+ ins_cost(VEC_COST);
+ format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsF" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg),
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 + -src2 * src3
+// dst_src1 = dst_src1 + src2 * -src3
+instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA);
+ match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3)));
+ match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3))));
+ ins_cost(VEC_COST);
+ format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsD" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg),
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector fnmla
+
+// dst_src1 = -dst_src1 + -src2 * src3
+// dst_src1 = -dst_src1 + src2 * -src3
+instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA);
+ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3)));
+ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3))));
+ ins_cost(VEC_COST);
+ format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaF" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg),
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = -dst_src1 + -src2 * src3
+// dst_src1 = -dst_src1 + src2 * -src3
+instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA);
+ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3)));
+ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3))));
+ ins_cost(VEC_COST);
+ format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaD" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg),
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector fnmls
+
+// dst_src1 = -dst_src1 + src2 * src3
+instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA);
+ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3)));
+ ins_cost(VEC_COST);
+ format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsF" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vfmsac_vv(as_VectorRegister($dst_src1$$reg),
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = -dst_src1 + src2 * src3
+instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA);
+ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3)));
+ ins_cost(VEC_COST);
+ format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsD" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vfmsac_vv(as_VectorRegister($dst_src1$$reg),
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector mla
+
+// dst_src1 = dst_src1 + src2 * src3
+instruct vmlaB(vReg dst_src1, vReg src2, vReg src3) %{
+ match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3)));
+ ins_cost(VEC_COST);
+ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaB" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e8);
+ __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 + src2 * src3
+instruct vmlaS(vReg dst_src1, vReg src2, vReg src3) %{
+ match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3)));
+ ins_cost(VEC_COST);
+ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaS" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e16);
+ __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 + src2 * src3
+instruct vmlaI(vReg dst_src1, vReg src2, vReg src3) %{
+ match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3)));
+ ins_cost(VEC_COST);
+ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaI" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 + src2 * src3
+instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) %{
+ match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3)));
+ ins_cost(VEC_COST);
+ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaL" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector mls
+
+// dst_src1 = dst_src1 - src2 * src3
+instruct vmlsB(vReg dst_src1, vReg src2, vReg src3) %{
+ match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3)));
+ ins_cost(VEC_COST);
+ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsB" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e8);
+ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 - src2 * src3
+instruct vmlsS(vReg dst_src1, vReg src2, vReg src3) %{
+ match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3)));
+ ins_cost(VEC_COST);
+ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsS" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e16);
+ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 - src2 * src3
+instruct vmlsI(vReg dst_src1, vReg src2, vReg src3) %{
+ match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3)));
+ ins_cost(VEC_COST);
+ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsI" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 - src2 * src3
+instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) %{
+ match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3)));
+ ins_cost(VEC_COST);
+ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsL" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector mul
+
+instruct vmulB(vReg dst, vReg src1, vReg src2) %{
+ match(Set dst (MulVB src1 src2));
+ ins_cost(VEC_COST);
+ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulB" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e8);
+ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
+ as_VectorRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vmulS(vReg dst, vReg src1, vReg src2) %{
+ match(Set dst (MulVS src1 src2));
+ ins_cost(VEC_COST);
+ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulS" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e16);
+ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
+ as_VectorRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vmulI(vReg dst, vReg src1, vReg src2) %{
+ match(Set dst (MulVI src1 src2));
+ ins_cost(VEC_COST);
+ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulI" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
+ as_VectorRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vmulL(vReg dst, vReg src1, vReg src2) %{
+ match(Set dst (MulVL src1 src2));
+ ins_cost(VEC_COST);
+ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulL" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
+ as_VectorRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vmulF(vReg dst, vReg src1, vReg src2) %{
+ match(Set dst (MulVF src1 src2));
+ ins_cost(VEC_COST);
+ format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulF" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
+ as_VectorRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vmulD(vReg dst, vReg src1, vReg src2) %{
+ match(Set dst (MulVD src1 src2));
+ ins_cost(VEC_COST);
+ format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulD" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
+ as_VectorRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector fneg
+
+instruct vnegF(vReg dst, vReg src) %{
+ match(Set dst (NegVF src));
+ ins_cost(VEC_COST);
+ format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegF" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vnegD(vReg dst, vReg src) %{
+ match(Set dst (NegVD src));
+ ins_cost(VEC_COST);
+ format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegD" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// popcount vector
+
+instruct vpopcountI(iRegINoSp dst, vReg src) %{
+ match(Set dst (PopCountVI src));
+ format %{ "vpopc.m $dst, $src\t#@vpopcountI" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vpopc_m(as_Register($dst$$reg), as_VectorRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector add reduction
+
+instruct reduce_addB(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (AddReductionVI src1 src2));
+ effect(TEMP tmp);
+ ins_cost(VEC_COST);
+ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addB\n\t"
+ "vredsum.vs $tmp, $src2, $tmp\n\t"
+ "vmv.x.s $dst, $tmp" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e8);
+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
+ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
+ as_VectorRegister($tmp$$reg));
+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_addS(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (AddReductionVI src1 src2));
+ effect(TEMP tmp);
+ ins_cost(VEC_COST);
+ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addS\n\t"
+ "vredsum.vs $tmp, $src2, $tmp\n\t"
+ "vmv.x.s $dst, $tmp" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e16);
+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
+ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
+ as_VectorRegister($tmp$$reg));
+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (AddReductionVI src1 src2));
+ effect(TEMP tmp);
+ ins_cost(VEC_COST);
+ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addI\n\t"
+ "vredsum.vs $tmp, $src2, $tmp\n\t"
+ "vmv.x.s $dst, $tmp" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
+ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
+ as_VectorRegister($tmp$$reg));
+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (AddReductionVL src1 src2));
+ effect(TEMP tmp);
+ ins_cost(VEC_COST);
+ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addL\n\t"
+ "vredsum.vs $tmp, $src2, $tmp\n\t"
+ "vmv.x.s $dst, $tmp" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
+ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
+ as_VectorRegister($tmp$$reg));
+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_addF(fRegF src1_dst, vReg src2, vReg tmp) %{
+ match(Set src1_dst (AddReductionVF src1_dst src2));
+ effect(TEMP tmp);
+ ins_cost(VEC_COST);
+ format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addF\n\t"
+ "vfredosum.vs $tmp, $src2, $tmp\n\t"
+ "vfmv.f.s $src1_dst, $tmp" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister);
+ __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
+ as_VectorRegister($tmp$$reg));
+ __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_addD(fRegD src1_dst, vReg src2, vReg tmp) %{
+ match(Set src1_dst (AddReductionVD src1_dst src2));
+ effect(TEMP tmp);
+ ins_cost(VEC_COST);
+ format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addD\n\t"
+ "vfredosum.vs $tmp, $src2, $tmp\n\t"
+ "vfmv.f.s $src1_dst, $tmp" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister);
+ __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
+ as_VectorRegister($tmp$$reg));
+ __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector integer max reduction
+instruct vreduce_maxB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (MaxReductionV src1 src2));
+ ins_cost(VEC_COST);
+ effect(TEMP tmp);
+ format %{ "vreduce_maxB $dst, $src1, $src2, $tmp" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e8);
+ __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
+ Label Ldone;
+ __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
+ __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
+ __ bind(Ldone);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vreduce_maxS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (MaxReductionV src1 src2));
+ ins_cost(VEC_COST);
+ effect(TEMP tmp);
+ format %{ "vreduce_maxS $dst, $src1, $src2, $tmp" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e16);
+ __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
+ Label Ldone;
+ __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
+ __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
+ __ bind(Ldone);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vreduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (MaxReductionV src1 src2));
+ ins_cost(VEC_COST);
+ effect(TEMP tmp);
+ format %{ "vreduce_maxI $dst, $src1, $src2, $tmp" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
+ __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vreduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (MaxReductionV src1 src2));
+ ins_cost(VEC_COST);
+ effect(TEMP tmp);
+ format %{ "vreduce_maxL $dst, $src1, $src2, $tmp" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
+ __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector integer min reduction
+instruct vreduce_minB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (MinReductionV src1 src2));
+ ins_cost(VEC_COST);
+ effect(TEMP tmp);
+ format %{ "vreduce_minB $dst, $src1, $src2, $tmp" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e8);
+ __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
+ Label Ldone;
+ __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
+ __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
+ __ bind(Ldone);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vreduce_minS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (MinReductionV src1 src2));
+ ins_cost(VEC_COST);
+ effect(TEMP tmp);
+ format %{ "vreduce_minS $dst, $src1, $src2, $tmp" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e16);
+ __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
+ Label Ldone;
+ __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
+ __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
+ __ bind(Ldone);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vreduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (MinReductionV src1 src2));
+ ins_cost(VEC_COST);
+ effect(TEMP tmp);
+ format %{ "vreduce_minI $dst, $src1, $src2, $tmp" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
+ __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vreduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (MinReductionV src1 src2));
+ ins_cost(VEC_COST);
+ effect(TEMP tmp);
+ format %{ "vreduce_minL $dst, $src1, $src2, $tmp" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
+ __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector float max reduction
+
+instruct vreduce_maxF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (MaxReductionV src1 src2));
+ ins_cost(VEC_COST);
+ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
+ format %{ "reduce_maxF $dst, $src1, $src2, $tmp1, $tmp2" %}
+ ins_encode %{
+ __ reduce_minmax_FD_v($dst$$FloatRegister,
+ $src1$$FloatRegister, as_VectorRegister($src2$$reg),
+ as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
+ false /* is_double */, false /* is_min */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vreduce_maxD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+ match(Set dst (MaxReductionV src1 src2));
+ ins_cost(VEC_COST);
+ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
+ format %{ "reduce_maxD $dst, $src1, $src2, $tmp1, $tmp2" %}
+ ins_encode %{
+ __ reduce_minmax_FD_v($dst$$FloatRegister,
+ $src1$$FloatRegister, as_VectorRegister($src2$$reg),
+ as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
+ true /* is_double */, false /* is_min */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector float min reduction
+
+instruct vreduce_minF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (MinReductionV src1 src2));
+ ins_cost(VEC_COST);
+ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
+ format %{ "reduce_minF $dst, $src1, $src2, $tmp1, $tmp2" %}
+ ins_encode %{
+ __ reduce_minmax_FD_v($dst$$FloatRegister,
+ $src1$$FloatRegister, as_VectorRegister($src2$$reg),
+ as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
+ false /* is_double */, true /* is_min */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vreduce_minD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+ match(Set dst (MinReductionV src1 src2));
+ ins_cost(VEC_COST);
+ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
+ format %{ "reduce_minD $dst, $src1, $src2, $tmp1, $tmp2" %}
+ ins_encode %{
+ __ reduce_minmax_FD_v($dst$$FloatRegister,
+ $src1$$FloatRegister, as_VectorRegister($src2$$reg),
+ as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
+ true /* is_double */, true /* is_min */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector Math.rint, floor, ceil
+
+instruct vroundD(vReg dst, vReg src, immI rmode) %{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+ match(Set dst (RoundDoubleModeV src rmode));
+ format %{ "vroundD $dst, $src, $rmode" %}
+ ins_encode %{
+ switch ($rmode$$constant) {
+ case RoundDoubleModeNode::rmode_rint:
+ __ csrwi(CSR_FRM, C2_MacroAssembler::rne);
+ __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
+ break;
+ case RoundDoubleModeNode::rmode_floor:
+ __ csrwi(CSR_FRM, C2_MacroAssembler::rdn);
+ __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
+ break;
+ case RoundDoubleModeNode::rmode_ceil:
+ __ csrwi(CSR_FRM, C2_MacroAssembler::rup);
+ __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
+ break;
+ default:
+ ShouldNotReachHere();
+ break;
+ }
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector replicate
+
+instruct replicateB(vReg dst, iRegIorL2I src) %{
+ match(Set dst (ReplicateB src));
+ ins_cost(VEC_COST);
+ format %{ "vmv.v.x $dst, $src\t#@replicateB" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e8);
+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct replicateS(vReg dst, iRegIorL2I src) %{
+ match(Set dst (ReplicateS src));
+ ins_cost(VEC_COST);
+ format %{ "vmv.v.x $dst, $src\t#@replicateS" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e16);
+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct replicateI(vReg dst, iRegIorL2I src) %{
+ match(Set dst (ReplicateI src));
+ ins_cost(VEC_COST);
+ format %{ "vmv.v.x $dst, $src\t#@replicateI" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct replicateL(vReg dst, iRegL src) %{
+ match(Set dst (ReplicateL src));
+ ins_cost(VEC_COST);
+ format %{ "vmv.v.x $dst, $src\t#@replicateL" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct replicateB_imm5(vReg dst, immI5 con) %{
+ match(Set dst (ReplicateB con));
+ ins_cost(VEC_COST);
+ format %{ "vmv.v.i $dst, $con\t#@replicateB_imm5" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e8);
+ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct replicateS_imm5(vReg dst, immI5 con) %{
+ match(Set dst (ReplicateS con));
+ ins_cost(VEC_COST);
+ format %{ "vmv.v.i $dst, $con\t#@replicateS_imm5" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e16);
+ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct replicateI_imm5(vReg dst, immI5 con) %{
+ match(Set dst (ReplicateI con));
+ ins_cost(VEC_COST);
+ format %{ "vmv.v.i $dst, $con\t#@replicateI_imm5" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct replicateL_imm5(vReg dst, immL5 con) %{
+ match(Set dst (ReplicateL con));
+ ins_cost(VEC_COST);
+ format %{ "vmv.v.i $dst, $con\t#@replicateL_imm5" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct replicateF(vReg dst, fRegF src) %{
+ match(Set dst (ReplicateF src));
+ ins_cost(VEC_COST);
+ format %{ "vfmv.v.f $dst, $src\t#@replicateF" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct replicateD(vReg dst, fRegD src) %{
+ match(Set dst (ReplicateD src));
+ ins_cost(VEC_COST);
+ format %{ "vfmv.v.f $dst, $src\t#@replicateD" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector shift
+
+instruct vasrB(vReg dst, vReg src, vReg shift) %{
+ match(Set dst (RShiftVB src shift));
+ ins_cost(VEC_COST);
+ effect(TEMP_DEF dst);
+ format %{ "vmsgtu.vi v0, $shift 7\t#@vasrB\n\t"
+ "vsra.vi $dst, $src, 7, Assembler::v0_t\n\t"
+ "vmnot.m v0, v0\n\t"
+ "vsra.vv $dst, $src, $shift, Assembler::v0_t" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e8);
+ // if shift > BitsPerByte - 1, clear the low BitsPerByte - 1 bits
+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ BitsPerByte - 1, Assembler::v0_t);
+ // otherwise, shift
+ __ vmnot_m(v0, v0);
+ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($shift$$reg), Assembler::v0_t);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vasrS(vReg dst, vReg src, vReg shift) %{
+ match(Set dst (RShiftVS src shift));
+ ins_cost(VEC_COST);
+ effect(TEMP_DEF dst);
+ format %{ "vmsgtu.vi v0, $shift, 15\t#@vasrS\n\t"
+ "vsra.vi $dst, $src, 15, Assembler::v0_t\n\t"
+ "vmnot.m v0, v0\n\t"
+ "vsra.vv $dst, $src, $shift, Assembler::v0_t" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e16);
+ // if shift > BitsPerShort - 1, clear the low BitsPerShort - 1 bits
+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ BitsPerShort - 1, Assembler::v0_t);
+ // otherwise, shift
+ __ vmnot_m(v0, v0);
+ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($shift$$reg), Assembler::v0_t);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vasrI(vReg dst, vReg src, vReg shift) %{
+ match(Set dst (RShiftVI src shift));
+ ins_cost(VEC_COST);
+ format %{ "vsra.vv $dst, $src, $shift\t#@vasrI" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vasrL(vReg dst, vReg src, vReg shift) %{
+ match(Set dst (RShiftVL src shift));
+ ins_cost(VEC_COST);
+ format %{ "vsra.vv $dst, $src, $shift\t#@vasrL" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlslB(vReg dst, vReg src, vReg shift) %{
+ match(Set dst (LShiftVB src shift));
+ ins_cost(VEC_COST);
+ effect( TEMP_DEF dst);
+ format %{ "vmsgtu.vi v0, $shift, 7\t#@vlslB\n\t"
+ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
+ "vmnot.m v0, v0\n\t"
+ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e8);
+ // if shift > BitsPerByte - 1, clear the element
+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($src$$reg), Assembler::v0_t);
+ // otherwise, shift
+ __ vmnot_m(v0, v0);
+ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($shift$$reg), Assembler::v0_t);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlslS(vReg dst, vReg src, vReg shift) %{
+ match(Set dst (LShiftVS src shift));
+ ins_cost(VEC_COST);
+ effect(TEMP_DEF dst);
+ format %{ "vmsgtu.vi v0, $shift, 15\t#@vlslS\n\t"
+ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
+ "vmnot.m v0, v0\n\t"
+ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e16);
+ // if shift > BitsPerShort - 1, clear the element
+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($src$$reg), Assembler::v0_t);
+ // otherwise, shift
+ __ vmnot_m(v0, v0);
+ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($shift$$reg), Assembler::v0_t);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlslI(vReg dst, vReg src, vReg shift) %{
+ match(Set dst (LShiftVI src shift));
+ ins_cost(VEC_COST);
+ format %{ "vsll.vv $dst, $src, $shift\t#@vlslI" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlslL(vReg dst, vReg src, vReg shift) %{
+ match(Set dst (LShiftVL src shift));
+ ins_cost(VEC_COST);
+ format %{ "vsll.vv $dst, $src, $shift\t# vector (D)" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlsrB(vReg dst, vReg src, vReg shift) %{
+ match(Set dst (URShiftVB src shift));
+ ins_cost(VEC_COST);
+ effect(TEMP_DEF dst);
+ format %{ "vmsgtu.vi v0, $shift, 7\t#@vlsrB\n\t"
+ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
+ "vmnot.m v0, v0, v0\n\t"
+ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e8);
+ // if shift > BitsPerByte - 1, clear the element
+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($src$$reg), Assembler::v0_t);
+ // otherwise, shift
+ __ vmnot_m(v0, v0);
+ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($shift$$reg), Assembler::v0_t);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlsrS(vReg dst, vReg src, vReg shift) %{
+ match(Set dst (URShiftVS src shift));
+ ins_cost(VEC_COST);
+ effect(TEMP_DEF dst);
+ format %{ "vmsgtu.vi v0, $shift, 15\t#@vlsrS\n\t"
+ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
+ "vmnot.m v0, v0\n\t"
+ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e16);
+ // if shift > BitsPerShort - 1, clear the element
+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($src$$reg), Assembler::v0_t);
+ // otherwise, shift
+ __ vmnot_m(v0, v0);
+ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($shift$$reg), Assembler::v0_t);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+
+instruct vlsrI(vReg dst, vReg src, vReg shift) %{
+ match(Set dst (URShiftVI src shift));
+ ins_cost(VEC_COST);
+ format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrI" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+
+instruct vlsrL(vReg dst, vReg src, vReg shift) %{
+ match(Set dst (URShiftVL src shift));
+ ins_cost(VEC_COST);
+ format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrL" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vasrB_imm(vReg dst, vReg src, immI shift) %{
+ match(Set dst (RShiftVB src (RShiftCntV shift)));
+ ins_cost(VEC_COST);
+ format %{ "vsra.vi $dst, $src, $shift\t#@vasrB_imm" %}
+ ins_encode %{
+ uint32_t con = (unsigned)$shift$$constant & 0x1f;
+ __ vsetvli(t0, x0, Assembler::e8);
+ if (con == 0) {
+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($src$$reg));
+ return;
+ }
+ if (con >= BitsPerByte) con = BitsPerByte - 1;
+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vasrS_imm(vReg dst, vReg src, immI shift) %{
+ match(Set dst (RShiftVS src (RShiftCntV shift)));
+ ins_cost(VEC_COST);
+ format %{ "vsra.vi $dst, $src, $shift\t#@vasrS_imm" %}
+ ins_encode %{
+ uint32_t con = (unsigned)$shift$$constant & 0x1f;
+ __ vsetvli(t0, x0, Assembler::e16);
+ if (con == 0) {
+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($src$$reg));
+ return;
+ }
+ if (con >= BitsPerShort) con = BitsPerShort - 1;
+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vasrI_imm(vReg dst, vReg src, immI shift) %{
+ match(Set dst (RShiftVI src (RShiftCntV shift)));
+ ins_cost(VEC_COST);
+ format %{ "vsrl.vi $dst, $src, $shift\t#@vasrI_imm" %}
+ ins_encode %{
+ uint32_t con = (unsigned)$shift$$constant & 0x1f;
+ __ vsetvli(t0, x0, Assembler::e32);
+ if (con == 0) {
+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($src$$reg));
+ return;
+ }
+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vasrL_imm(vReg dst, vReg src, immI shift) %{
+ predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
+ match(Set dst (RShiftVL src (RShiftCntV shift)));
+ ins_cost(VEC_COST);
+ format %{ "vsrl.vi $dst, $src, $shift\t#@vasrL_imm" %}
+ ins_encode %{
+ uint32_t con = (unsigned)$shift$$constant & 0x1f;
+ __ vsetvli(t0, x0, Assembler::e64);
+ if (con == 0) {
+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($src$$reg));
+ return;
+ }
+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{
+ match(Set dst (URShiftVB src (RShiftCntV shift)));
+ ins_cost(VEC_COST);
+ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrB_imm" %}
+ ins_encode %{
+ uint32_t con = (unsigned)$shift$$constant & 0x1f;
+ __ vsetvli(t0, x0, Assembler::e8);
+ if (con == 0) {
+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($src$$reg));
+ return;
+ }
+ if (con >= BitsPerByte) {
+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($src$$reg));
+ return;
+ }
+ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{
+ match(Set dst (URShiftVS src (RShiftCntV shift)));
+ ins_cost(VEC_COST);
+ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrS_imm" %}
+ ins_encode %{
+ uint32_t con = (unsigned)$shift$$constant & 0x1f;
+ __ vsetvli(t0, x0, Assembler::e16);
+ if (con == 0) {
+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($src$$reg));
+ return;
+ }
+ if (con >= BitsPerShort) {
+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($src$$reg));
+ return;
+ }
+ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{
+ match(Set dst (URShiftVI src (RShiftCntV shift)));
+ ins_cost(VEC_COST);
+ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrI_imm" %}
+ ins_encode %{
+ uint32_t con = (unsigned)$shift$$constant & 0x1f;
+ __ vsetvli(t0, x0, Assembler::e32);
+ if (con == 0) {
+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($src$$reg));
+ return;
+ }
+ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{
+ predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
+ match(Set dst (URShiftVL src (RShiftCntV shift)));
+ ins_cost(VEC_COST);
+ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrL_imm" %}
+ ins_encode %{
+ uint32_t con = (unsigned)$shift$$constant & 0x1f;
+ __ vsetvli(t0, x0, Assembler::e64);
+ if (con == 0) {
+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($src$$reg));
+ return;
+ }
+ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlslB_imm(vReg dst, vReg src, immI shift) %{
+ match(Set dst (LShiftVB src (LShiftCntV shift)));
+ ins_cost(VEC_COST);
+ format %{ "vsll.vi $dst, $src, $shift\t#@vlslB_imm" %}
+ ins_encode %{
+ uint32_t con = (unsigned)$shift$$constant & 0x1f;
+ __ vsetvli(t0, x0, Assembler::e8);
+ if (con >= BitsPerByte) {
+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($src$$reg));
+ return;
+ }
+ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlslS_imm(vReg dst, vReg src, immI shift) %{
+ match(Set dst (LShiftVS src (LShiftCntV shift)));
+ ins_cost(VEC_COST);
+ format %{ "vsll.vi $dst, $src, $shift\t#@vlslS_imm" %}
+ ins_encode %{
+ uint32_t con = (unsigned)$shift$$constant & 0x1f;
+ __ vsetvli(t0, x0, Assembler::e16);
+ if (con >= BitsPerShort) {
+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+ as_VectorRegister($src$$reg));
+ return;
+ }
+ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlslI_imm(vReg dst, vReg src, immI shift) %{
+ match(Set dst (LShiftVI src (LShiftCntV shift)));
+ ins_cost(VEC_COST);
+ format %{ "vsll.vi $dst, $src, $shift\t#@vlslI_imm" %}
+ ins_encode %{
+ uint32_t con = (unsigned)$shift$$constant & 0x1f;
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlslL_imm(vReg dst, vReg src, immI shift) %{
+ predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
+ match(Set dst (LShiftVL src (LShiftCntV shift)));
+ ins_cost(VEC_COST);
+ format %{ "vsll.vi $dst, $src, $shift\t#@vlslL_imm" %}
+ ins_encode %{
+ uint32_t con = (unsigned)$shift$$constant & 0x1f;
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (LShiftCntV cnt));
+ match(Set dst (RShiftCntV cnt));
+ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntB" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e8);
+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
+ n->bottom_type()->is_vect()->element_basic_type() == T_CHAR);
+ match(Set dst (LShiftCntV cnt));
+ match(Set dst (RShiftCntV cnt));
+ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntS" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e16);
+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (LShiftCntV cnt));
+ match(Set dst (RShiftCntV cnt));
+ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntI" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (LShiftCntV cnt));
+ match(Set dst (RShiftCntV cnt));
+ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntL" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector sqrt
+
+instruct vsqrtF(vReg dst, vReg src) %{
+ match(Set dst (SqrtVF src));
+ ins_cost(VEC_COST);
+ format %{ "vfsqrt.v $dst, $src\t#@vsqrtF" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vsqrtD(vReg dst, vReg src) %{
+ match(Set dst (SqrtVD src));
+ ins_cost(VEC_COST);
+ format %{ "vfsqrt.v $dst, $src\t#@vsqrtD" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector sub
+
+instruct vsubB(vReg dst, vReg src1, vReg src2) %{
+ match(Set dst (SubVB src1 src2));
+ ins_cost(VEC_COST);
+ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubB" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e8);
+ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
+ as_VectorRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vsubS(vReg dst, vReg src1, vReg src2) %{
+ match(Set dst (SubVS src1 src2));
+ ins_cost(VEC_COST);
+ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubS" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e16);
+ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
+ as_VectorRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vsubI(vReg dst, vReg src1, vReg src2) %{
+ match(Set dst (SubVI src1 src2));
+ ins_cost(VEC_COST);
+ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubI" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
+ as_VectorRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vsubL(vReg dst, vReg src1, vReg src2) %{
+ match(Set dst (SubVL src1 src2));
+ ins_cost(VEC_COST);
+ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubL" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
+ as_VectorRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vsubF(vReg dst, vReg src1, vReg src2) %{
+ match(Set dst (SubVF src1 src2));
+ ins_cost(VEC_COST);
+ format %{ "vfsub.vv $dst, $src1, $src2\t@vsubF" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e32);
+ __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
+ as_VectorRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vsubD(vReg dst, vReg src1, vReg src2) %{
+ match(Set dst (SubVD src1 src2));
+ ins_cost(VEC_COST);
+ format %{ "vfsub.vv $dst, $src1, $src2\t#@vsubD" %}
+ ins_encode %{
+ __ vsetvli(t0, x0, Assembler::e64);
+ __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
+ as_VectorRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vstring_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
+ iRegI_R10 result, vReg_V1 v1,
+ vReg_V2 v2, vReg_V3 v3, rFlagsReg cr)
+%{
+ predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
+ match(Set result (StrEquals (Binary str1 str2) cnt));
+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr);
+
+ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %}
+ ins_encode %{
+ // Count is in 8-bit bytes; non-Compact chars are 16 bits.
+ __ string_equals_v($str1$$Register, $str2$$Register,
+ $result$$Register, $cnt$$Register, 1);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct vstring_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
+ iRegI_R10 result, vReg_V1 v1,
+ vReg_V2 v2, vReg_V3 v3, rFlagsReg cr)
+%{
+ predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
+ match(Set result (StrEquals (Binary str1 str2) cnt));
+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr);
+
+ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %}
+ ins_encode %{
+ // Count is in 8-bit bytes; non-Compact chars are 16 bits.
+ __ string_equals_v($str1$$Register, $str2$$Register,
+ $result$$Register, $cnt$$Register, 2);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct varray_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr)
+%{
+ predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
+ match(Set result (AryEq ary1 ary2));
+ effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr);
+
+ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %}
+ ins_encode %{
+ __ arrays_equals_v($ary1$$Register, $ary2$$Register,
+ $result$$Register, $tmp$$Register, 1);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr)
+%{
+ predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
+ match(Set result (AryEq ary1 ary2));
+ effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr);
+
+ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %}
+ ins_encode %{
+ __ arrays_equals_v($ary1$$Register, $ary2$$Register,
+ $result$$Register, $tmp$$Register, 2);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct vstring_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
+ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
+ iRegP_R28 tmp1, iRegL_R29 tmp2)
+%{
+ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
+ effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
+ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
+
+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %}
+ ins_encode %{
+ // Count is in 8-bit bytes; non-Compact chars are 16 bits.
+ __ string_compare_v($str1$$Register, $str2$$Register,
+ $cnt1$$Register, $cnt2$$Register, $result$$Register,
+ $tmp1$$Register, $tmp2$$Register,
+ StrIntrinsicNode::UU);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+instruct vstring_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
+ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
+ iRegP_R28 tmp1, iRegL_R29 tmp2)
+%{
+ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
+ effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
+ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
+
+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %}
+ ins_encode %{
+ __ string_compare_v($str1$$Register, $str2$$Register,
+ $cnt1$$Register, $cnt2$$Register, $result$$Register,
+ $tmp1$$Register, $tmp2$$Register,
+ StrIntrinsicNode::LL);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct vstring_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
+ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
+ iRegP_R28 tmp1, iRegL_R29 tmp2)
+%{
+ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
+ effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
+ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
+
+ format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %}
+ ins_encode %{
+ __ string_compare_v($str1$$Register, $str2$$Register,
+ $cnt1$$Register, $cnt2$$Register, $result$$Register,
+ $tmp1$$Register, $tmp2$$Register,
+ StrIntrinsicNode::UL);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+instruct vstring_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
+ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
+ iRegP_R28 tmp1, iRegL_R29 tmp2)
+%{
+ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
+ effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
+ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
+
+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %}
+ ins_encode %{
+ __ string_compare_v($str1$$Register, $str2$$Register,
+ $cnt1$$Register, $cnt2$$Register, $result$$Register,
+ $tmp1$$Register, $tmp2$$Register,
+ StrIntrinsicNode::LU);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+// fast byte[] to char[] inflation
+instruct vstring_inflate(Universe dummy, iRegP_R10 src, iRegP_R11 dst, iRegI_R12 len,
+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
+%{
+ predicate(UseRVV);
+ match(Set dummy (StrInflatedCopy src (Binary dst len)));
+ effect(TEMP v1, TEMP v2, TEMP v3, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len);
+
+ format %{ "String Inflate $src,$dst" %}
+ ins_encode %{
+ __ byte_array_inflate_v($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+// encode char[] to byte[] in ISO_8859_1
+instruct vencode_iso_array(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
+%{
+ predicate(UseRVV);
+ match(Set result (EncodeISOArray src (Binary dst len)));
+ effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
+ TEMP v1, TEMP v2, TEMP v3, TEMP tmp);
+
+ format %{ "Encode array $src,$dst,$len -> $result" %}
+ ins_encode %{
+ __ encode_iso_array_v($src$$Register, $dst$$Register, $len$$Register,
+ $result$$Register, $tmp$$Register);
+ %}
+ ins_pipe( pipe_class_memory );
+%}
+
+// fast char[] to byte[] compression
+instruct vstring_compress(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
+%{
+ predicate(UseRVV);
+ match(Set result (StrCompressedCopy src (Binary dst len)));
+ effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
+ TEMP v1, TEMP v2, TEMP v3, TEMP tmp);
+
+ format %{ "String Compress $src,$dst -> $result // KILL R11, R12, R13" %}
+ ins_encode %{
+ __ char_array_compress_v($src$$Register, $dst$$Register, $len$$Register,
+ $result$$Register, $tmp$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vcount_positives(iRegP_R11 ary, iRegI_R12 len, iRegI_R10 result, iRegL tmp)
+%{
+ predicate(UseRVV);
+ match(Set result (CountPositives ary len));
+ effect(USE_KILL ary, USE_KILL len, TEMP tmp);
+
+ format %{ "count positives byte[] $ary, $len -> $result" %}
+ ins_encode %{
+ __ count_positives_v($ary$$Register, $len$$Register, $result$$Register, $tmp$$Register);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
+instruct vstringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3)
+%{
+ predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
+ effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
+ TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3);
+
+ format %{ "StringUTF16 IndexOf char[] $str1, $cnt1, $ch -> $result" %}
+
+ ins_encode %{
+ __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register,
+ $result$$Register, $tmp1$$Register, $tmp2$$Register,
+ false /* isL */);
+ %}
+
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct vstringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3)
+%{
+ predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
+ effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
+ TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3);
+
+ format %{ "StringLatin1 IndexOf char[] $str1, $cnt1, $ch -> $result" %}
+
+ ins_encode %{
+ __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register,
+ $result$$Register, $tmp1$$Register, $tmp2$$Register,
+ true /* isL */);
+ %}
+
+ ins_pipe(pipe_class_memory);
+%}
+
+// clearing of an array
+instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy,
+ vReg_V1 vReg1, vReg_V2 vReg2, vReg_V3 vReg3)
+%{
+ predicate(UseRVV);
+ match(Set dummy (ClearArray cnt base));
+ effect(USE_KILL cnt, USE_KILL base, TEMP vReg1, TEMP vReg2, TEMP vReg3);
+
+ format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %}
+
+ ins_encode %{
+ __ clear_array_v($base$$Register, $cnt$$Register);
+ %}
+
+ ins_pipe(pipe_class_memory);
+%}
diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f85d4b25a7669e4975905634ecba9dc620fb0df9
--- /dev/null
+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
@@ -0,0 +1,2761 @@
+/*
+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "code/debugInfoRec.hpp"
+#include "code/icBuffer.hpp"
+#include "code/vtableStubs.hpp"
+#include "compiler/oopMap.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
+#include "interpreter/interp_masm.hpp"
+#include "interpreter/interpreter.hpp"
+#include "logging/log.hpp"
+#include "memory/resourceArea.hpp"
+#include "nativeInst_riscv.hpp"
+#include "oops/compiledICHolder.hpp"
+#include "oops/klass.inline.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/jniHandles.hpp"
+#include "runtime/safepointMechanism.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/signature.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/align.hpp"
+#include "utilities/formatBuffer.hpp"
+#include "vmreg_riscv.inline.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#endif
+#ifdef COMPILER2
+#include "adfiles/ad_riscv.hpp"
+#include "opto/runtime.hpp"
+#endif
+
+#define __ masm->
+
+const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
+
+class SimpleRuntimeFrame {
+public:
+
+ // Most of the runtime stubs have this simple frame layout.
+ // This class exists to make the layout shared in one place.
+ // Offsets are for compiler stack slots, which are jints.
+ enum layout {
+ // The frame sender code expects that fp will be in the "natural" place and
+ // will override any oopMap setting for it. We must therefore force the layout
+ // so that it agrees with the frame sender code.
+ // we don't expect any arg reg save area so riscv asserts that
+ // frame::arg_reg_save_area_bytes == 0
+ fp_off = 0, fp_off2,
+ return_off, return_off2,
+ framesize
+ };
+};
+
+class RegisterSaver {
+ const bool _save_vectors;
+ public:
+ RegisterSaver(bool save_vectors) : _save_vectors(UseRVV && save_vectors) {}
+ ~RegisterSaver() {}
+ OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
+ void restore_live_registers(MacroAssembler* masm);
+
+ // Offsets into the register save area
+ // Used by deoptimization when it is managing result register
+ // values on its own
+ // gregs:28, float_register:32; except: x1(ra) & x2(sp) & gp(x3) & tp(x4)
+ // |---v0---|<---SP
+ // |---v1---|save vectors only in generate_handler_blob
+ // |-- .. --|
+ // |---v31--|-----
+ // |---f0---|
+ // |---f1---|
+ // | .. |
+ // |---f31--|
+ // |---reserved slot for stack alignment---|
+ // |---x5---|
+ // | x6 |
+ // |---.. --|
+ // |---x31--|
+ // |---fp---|
+ // |---ra---|
+ int v0_offset_in_bytes(void) { return 0; }
+ int f0_offset_in_bytes(void) {
+ int f0_offset = 0;
+#ifdef COMPILER2
+ if (_save_vectors) {
+ f0_offset += Matcher::scalable_vector_reg_size(T_INT) * VectorRegisterImpl::number_of_registers *
+ BytesPerInt;
+ }
+#endif
+ return f0_offset;
+ }
+ int reserved_slot_offset_in_bytes(void) {
+ return f0_offset_in_bytes() +
+ FloatRegisterImpl::max_slots_per_register *
+ FloatRegisterImpl::number_of_registers *
+ BytesPerInt;
+ }
+
+ int reg_offset_in_bytes(Register r) {
+ assert (r->encoding() > 4, "ra, sp, gp and tp not saved");
+ return reserved_slot_offset_in_bytes() + (r->encoding() - 4 /* x1, x2, x3, x4 */) * wordSize;
+ }
+
+ int freg_offset_in_bytes(FloatRegister f) {
+ return f0_offset_in_bytes() + f->encoding() * wordSize;
+ }
+
+ int ra_offset_in_bytes(void) {
+ return reserved_slot_offset_in_bytes() +
+ (RegisterImpl::number_of_registers - 3) *
+ RegisterImpl::max_slots_per_register *
+ BytesPerInt;
+ }
+};
+
+OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
+ int vector_size_in_bytes = 0;
+ int vector_size_in_slots = 0;
+#ifdef COMPILER2
+ if (_save_vectors) {
+ vector_size_in_bytes += Matcher::scalable_vector_reg_size(T_BYTE);
+ vector_size_in_slots += Matcher::scalable_vector_reg_size(T_INT);
+ }
+#endif
+
+ assert_cond(masm != NULL && total_frame_words != NULL);
+ int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16);
+ // OopMap frame size is in compiler stack slots (jint's) not bytes or words
+ int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
+ // The caller will allocate additional_frame_words
+ int additional_frame_slots = additional_frame_words * wordSize / BytesPerInt;
+ // CodeBlob frame size is in words.
+ int frame_size_in_words = frame_size_in_bytes / wordSize;
+ *total_frame_words = frame_size_in_words;
+
+ // Save Integer, Float and Vector registers.
+ __ enter();
+ __ push_CPU_state(_save_vectors, vector_size_in_bytes);
+
+ // Set an oopmap for the call site. This oopmap will map all
+ // oop-registers and debug-info registers as callee-saved. This
+ // will allow deoptimization at this safepoint to find all possible
+ // debug-info recordings, as well as let GC find all oops.
+
+ OopMapSet *oop_maps = new OopMapSet();
+ OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
+ assert_cond(oop_maps != NULL && oop_map != NULL);
+
+ int sp_offset_in_slots = 0;
+ int step_in_slots = 0;
+ if (_save_vectors) {
+ step_in_slots = vector_size_in_slots;
+ for (int i = 0; i < VectorRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
+ VectorRegister r = as_VectorRegister(i);
+ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg());
+ }
+ }
+
+ step_in_slots = FloatRegisterImpl::max_slots_per_register;
+ for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
+ FloatRegister r = as_FloatRegister(i);
+ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg());
+ }
+
+ step_in_slots = RegisterImpl::max_slots_per_register;
+ // skip the slot reserved for alignment, see MacroAssembler::push_reg;
+ // also skip x5 ~ x6 on the stack because they are caller-saved registers.
+ sp_offset_in_slots += RegisterImpl::max_slots_per_register * 3;
+ // besides, we ignore x0 ~ x4 because push_CPU_state won't push them on the stack.
+ for (int i = 7; i < RegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
+ Register r = as_Register(i);
+ if (r != xthread) {
+ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots + additional_frame_slots), r->as_VMReg());
+ }
+ }
+
+ return oop_map;
+}
+
+void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
+ assert_cond(masm != NULL);
+#ifdef COMPILER2
+ __ pop_CPU_state(_save_vectors, Matcher::scalable_vector_reg_size(T_BYTE));
+#else
+ __ pop_CPU_state(_save_vectors);
+#endif
+ __ leave();
+}
+
+// Is vector's size (in bytes) bigger than a size saved by default?
+// riscv does not ovlerlay the floating-point registers on vector registers like aarch64.
+bool SharedRuntime::is_wide_vector(int size) {
+ return UseRVV;
+}
+
+// The java_calling_convention describes stack locations as ideal slots on
+// a frame with no abi restrictions. Since we must observe abi restrictions
+// (like the placement of the register window) the slots must be biased by
+// the following value.
+static int reg2offset_in(VMReg r) {
+ // Account for saved fp and ra
+ // This should really be in_preserve_stack_slots
+ return r->reg2stack() * VMRegImpl::stack_slot_size;
+}
+
+static int reg2offset_out(VMReg r) {
+ return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
+}
+
+// ---------------------------------------------------------------------------
+// Read the array of BasicTypes from a signature, and compute where the
+// arguments should go. Values in the VMRegPair regs array refer to 4-byte
+// quantities. Values less than VMRegImpl::stack0 are registers, those above
+// refer to 4-byte stack slots. All stack slots are based off of the stack pointer
+// as framesizes are fixed.
+// VMRegImpl::stack0 refers to the first slot 0(sp).
+// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register
+// up to RegisterImpl::number_of_registers) are the 64-bit
+// integer registers.
+
+// Note: the INPUTS in sig_bt are in units of Java argument words,
+// which are 64-bit. The OUTPUTS are in 32-bit units.
+
+// The Java calling convention is a "shifted" version of the C ABI.
+// By skipping the first C ABI register we can call non-static jni
+// methods with small numbers of arguments without having to shuffle
+// the arguments at all. Since we control the java ABI we ought to at
+// least get some advantage out of it.
+
+int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
+ VMRegPair *regs,
+ int total_args_passed) {
+ // Create the mapping between argument positions and
+ // registers.
+ static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
+ j_rarg0, j_rarg1, j_rarg2, j_rarg3,
+ j_rarg4, j_rarg5, j_rarg6, j_rarg7
+ };
+ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = {
+ j_farg0, j_farg1, j_farg2, j_farg3,
+ j_farg4, j_farg5, j_farg6, j_farg7
+ };
+
+ uint int_args = 0;
+ uint fp_args = 0;
+ uint stk_args = 0; // inc by 2 each time
+
+ for (int i = 0; i < total_args_passed; i++) {
+ switch (sig_bt[i]) {
+ case T_BOOLEAN: // fall through
+ case T_CHAR: // fall through
+ case T_BYTE: // fall through
+ case T_SHORT: // fall through
+ case T_INT:
+ if (int_args < Argument::n_int_register_parameters_j) {
+ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
+ } else {
+ regs[i].set1(VMRegImpl::stack2reg(stk_args));
+ stk_args += 2;
+ }
+ break;
+ case T_VOID:
+ // halves of T_LONG or T_DOUBLE
+ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
+ regs[i].set_bad();
+ break;
+ case T_LONG: // fall through
+ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
+ case T_OBJECT: // fall through
+ case T_ARRAY: // fall through
+ case T_ADDRESS:
+ if (int_args < Argument::n_int_register_parameters_j) {
+ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
+ } else {
+ regs[i].set2(VMRegImpl::stack2reg(stk_args));
+ stk_args += 2;
+ }
+ break;
+ case T_FLOAT:
+ if (fp_args < Argument::n_float_register_parameters_j) {
+ regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
+ } else {
+ regs[i].set1(VMRegImpl::stack2reg(stk_args));
+ stk_args += 2;
+ }
+ break;
+ case T_DOUBLE:
+ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
+ if (fp_args < Argument::n_float_register_parameters_j) {
+ regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
+ } else {
+ regs[i].set2(VMRegImpl::stack2reg(stk_args));
+ stk_args += 2;
+ }
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ }
+
+ return align_up(stk_args, 2);
+}
+
+// Patch the callers callsite with entry to compiled code if it exists.
+static void patch_callers_callsite(MacroAssembler *masm) {
+ assert_cond(masm != NULL);
+ Label L;
+ __ ld(t0, Address(xmethod, in_bytes(Method::code_offset())));
+ __ beqz(t0, L);
+
+ __ enter();
+ __ push_CPU_state();
+
+ // VM needs caller's callsite
+ // VM needs target method
+ // This needs to be a long call since we will relocate this adapter to
+ // the codeBuffer and it may not reach
+
+#ifndef PRODUCT
+ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
+#endif
+
+ __ mv(c_rarg0, xmethod);
+ __ mv(c_rarg1, ra);
+ int32_t offset = 0;
+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)), offset);
+ __ jalr(x1, t0, offset);
+
+ // Explicit fence.i required because fixup_callers_callsite may change the code
+ // stream.
+ __ safepoint_ifence();
+
+ __ pop_CPU_state();
+ // restore sp
+ __ leave();
+ __ bind(L);
+}
+
+static void gen_c2i_adapter(MacroAssembler *masm,
+ int total_args_passed,
+ int comp_args_on_stack,
+ const BasicType *sig_bt,
+ const VMRegPair *regs,
+ Label& skip_fixup) {
+ // Before we get into the guts of the C2I adapter, see if we should be here
+ // at all. We've come from compiled code and are attempting to jump to the
+ // interpreter, which means the caller made a static call to get here
+ // (vcalls always get a compiled target if there is one). Check for a
+ // compiled target. If there is one, we need to patch the caller's call.
+ patch_callers_callsite(masm);
+
+ __ bind(skip_fixup);
+
+ int words_pushed = 0;
+
+ // Since all args are passed on the stack, total_args_passed *
+ // Interpreter::stackElementSize is the space we need.
+
+ int extraspace = total_args_passed * Interpreter::stackElementSize;
+
+ __ mv(x30, sp);
+
+ // stack is aligned, keep it that way
+ extraspace = align_up(extraspace, 2 * wordSize);
+
+ if (extraspace) {
+ __ sub(sp, sp, extraspace);
+ }
+
+ // Now write the args into the outgoing interpreter space
+ for (int i = 0; i < total_args_passed; i++) {
+ if (sig_bt[i] == T_VOID) {
+ assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half");
+ continue;
+ }
+
+ // offset to start parameters
+ int st_off = (total_args_passed - i - 1) * Interpreter::stackElementSize;
+ int next_off = st_off - Interpreter::stackElementSize;
+
+ // Say 4 args:
+ // i st_off
+ // 0 32 T_LONG
+ // 1 24 T_VOID
+ // 2 16 T_OBJECT
+ // 3 8 T_BOOL
+ // - 0 return address
+ //
+ // However to make thing extra confusing. Because we can fit a Java long/double in
+ // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
+ // leaves one slot empty and only stores to a single slot. In this case the
+ // slot that is occupied is the T_VOID slot. See I said it was confusing.
+
+ VMReg r_1 = regs[i].first();
+ VMReg r_2 = regs[i].second();
+ if (!r_1->is_valid()) {
+ assert(!r_2->is_valid(), "");
+ continue;
+ }
+ if (r_1->is_stack()) {
+ // memory to memory use t0
+ int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size
+ + extraspace
+ + words_pushed * wordSize);
+ if (!r_2->is_valid()) {
+ __ lwu(t0, Address(sp, ld_off));
+ __ sd(t0, Address(sp, st_off), /*temp register*/esp);
+ } else {
+ __ ld(t0, Address(sp, ld_off), /*temp register*/esp);
+
+ // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
+ // T_DOUBLE and T_LONG use two slots in the interpreter
+ if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
+ // ld_off == LSW, ld_off+wordSize == MSW
+ // st_off == MSW, next_off == LSW
+ __ sd(t0, Address(sp, next_off), /*temp register*/esp);
+#ifdef ASSERT
+ // Overwrite the unused slot with known junk
+ __ li(t0, 0xdeadffffdeadaaaaul);
+ __ sd(t0, Address(sp, st_off), /*temp register*/esp);
+#endif /* ASSERT */
+ } else {
+ __ sd(t0, Address(sp, st_off), /*temp register*/esp);
+ }
+ }
+ } else if (r_1->is_Register()) {
+ Register r = r_1->as_Register();
+ if (!r_2->is_valid()) {
+ // must be only an int (or less ) so move only 32bits to slot
+ __ sd(r, Address(sp, st_off));
+ } else {
+ // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
+ // T_DOUBLE and T_LONG use two slots in the interpreter
+ if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
+ // long/double in gpr
+#ifdef ASSERT
+ // Overwrite the unused slot with known junk
+ __ li(t0, 0xdeadffffdeadaaabul);
+ __ sd(t0, Address(sp, st_off), /*temp register*/esp);
+#endif /* ASSERT */
+ __ sd(r, Address(sp, next_off));
+ } else {
+ __ sd(r, Address(sp, st_off));
+ }
+ }
+ } else {
+ assert(r_1->is_FloatRegister(), "");
+ if (!r_2->is_valid()) {
+ // only a float use just part of the slot
+ __ fsw(r_1->as_FloatRegister(), Address(sp, st_off));
+ } else {
+#ifdef ASSERT
+ // Overwrite the unused slot with known junk
+ __ li(t0, 0xdeadffffdeadaaacul);
+ __ sd(t0, Address(sp, st_off), /*temp register*/esp);
+#endif /* ASSERT */
+ __ fsd(r_1->as_FloatRegister(), Address(sp, next_off));
+ }
+ }
+ }
+
+ __ mv(esp, sp); // Interp expects args on caller's expression stack
+
+ __ ld(t0, Address(xmethod, in_bytes(Method::interpreter_entry_offset())));
+ __ jr(t0);
+}
+
+void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
+ int total_args_passed,
+ int comp_args_on_stack,
+ const BasicType *sig_bt,
+ const VMRegPair *regs) {
+ // Cut-out for having no stack args.
+ int comp_words_on_stack = align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, wordSize) >> LogBytesPerWord;
+ if (comp_args_on_stack != 0) {
+ __ sub(t0, sp, comp_words_on_stack * wordSize);
+ __ andi(sp, t0, -16);
+ }
+
+ // Will jump to the compiled code just as if compiled code was doing it.
+ // Pre-load the register-jump target early, to schedule it better.
+ __ ld(t1, Address(xmethod, in_bytes(Method::from_compiled_offset())));
+
+ // Now generate the shuffle code.
+ for (int i = 0; i < total_args_passed; i++) {
+ if (sig_bt[i] == T_VOID) {
+ assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half");
+ continue;
+ }
+
+ // Pick up 0, 1 or 2 words from SP+offset.
+
+ assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
+ "scrambled load targets?");
+ // Load in argument order going down.
+ int ld_off = (total_args_passed - i - 1) * Interpreter::stackElementSize;
+ // Point to interpreter value (vs. tag)
+ int next_off = ld_off - Interpreter::stackElementSize;
+
+ VMReg r_1 = regs[i].first();
+ VMReg r_2 = regs[i].second();
+ if (!r_1->is_valid()) {
+ assert(!r_2->is_valid(), "");
+ continue;
+ }
+ if (r_1->is_stack()) {
+ // Convert stack slot to an SP offset (+ wordSize to account for return address )
+ int st_off = regs[i].first()->reg2stack() * VMRegImpl::stack_slot_size;
+ if (!r_2->is_valid()) {
+ __ lw(t0, Address(esp, ld_off));
+ __ sd(t0, Address(sp, st_off), /*temp register*/t2);
+ } else {
+ //
+ // We are using two optoregs. This can be either T_OBJECT,
+ // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
+ // two slots but only uses one for thr T_LONG or T_DOUBLE case
+ // So we must adjust where to pick up the data to match the
+ // interpreter.
+ //
+ // Interpreter local[n] == MSW, local[n+1] == LSW however locals
+ // are accessed as negative so LSW is at LOW address
+
+ // ld_off is MSW so get LSW
+ const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
+ next_off : ld_off;
+ __ ld(t0, Address(esp, offset));
+ // st_off is LSW (i.e. reg.first())
+ __ sd(t0, Address(sp, st_off), /*temp register*/t2);
+ }
+ } else if (r_1->is_Register()) { // Register argument
+ Register r = r_1->as_Register();
+ if (r_2->is_valid()) {
+ //
+ // We are using two VMRegs. This can be either T_OBJECT,
+ // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
+ // two slots but only uses one for thr T_LONG or T_DOUBLE case
+ // So we must adjust where to pick up the data to match the
+ // interpreter.
+
+ const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
+ next_off : ld_off;
+
+ // this can be a misaligned move
+ __ ld(r, Address(esp, offset));
+ } else {
+ // sign extend and use a full word?
+ __ lw(r, Address(esp, ld_off));
+ }
+ } else {
+ if (!r_2->is_valid()) {
+ __ flw(r_1->as_FloatRegister(), Address(esp, ld_off));
+ } else {
+ __ fld(r_1->as_FloatRegister(), Address(esp, next_off));
+ }
+ }
+ }
+
+ // 6243940 We might end up in handle_wrong_method if
+ // the callee is deoptimized as we race thru here. If that
+ // happens we don't want to take a safepoint because the
+ // caller frame will look interpreted and arguments are now
+ // "compiled" so it is much better to make this transition
+ // invisible to the stack walking code. Unfortunately if
+ // we try and find the callee by normal means a safepoint
+ // is possible. So we stash the desired callee in the thread
+ // and the vm will find there should this case occur.
+
+ __ sd(xmethod, Address(xthread, JavaThread::callee_target_offset()));
+
+ __ jr(t1);
+}
+
+// ---------------------------------------------------------------
+AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
+ int total_args_passed,
+ int comp_args_on_stack,
+ const BasicType *sig_bt,
+ const VMRegPair *regs,
+ AdapterFingerPrint* fingerprint) {
+ address i2c_entry = __ pc();
+ gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
+
+ address c2i_unverified_entry = __ pc();
+ Label skip_fixup;
+
+ Label ok;
+
+ const Register holder = t1;
+ const Register receiver = j_rarg0;
+ const Register tmp = t2; // A call-clobbered register not used for arg passing
+
+ // -------------------------------------------------------------------------
+ // Generate a C2I adapter. On entry we know xmethod holds the Method* during calls
+ // to the interpreter. The args start out packed in the compiled layout. They
+ // need to be unpacked into the interpreter layout. This will almost always
+ // require some stack space. We grow the current (compiled) stack, then repack
+ // the args. We finally end in a jump to the generic interpreter entry point.
+ // On exit from the interpreter, the interpreter will restore our SP (lest the
+ // compiled code, which relys solely on SP and not FP, get sick).
+
+ {
+ __ block_comment("c2i_unverified_entry {");
+ __ load_klass(t0, receiver);
+ __ ld(tmp, Address(holder, CompiledICHolder::holder_klass_offset()));
+ __ ld(xmethod, Address(holder, CompiledICHolder::holder_metadata_offset()));
+ __ beq(t0, tmp, ok);
+ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+
+ __ bind(ok);
+ // Method might have been compiled since the call site was patched to
+ // interpreted; if that is the case treat it as a miss so we can get
+ // the call site corrected.
+ __ ld(t0, Address(xmethod, in_bytes(Method::code_offset())));
+ __ beqz(t0, skip_fixup);
+ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+ __ block_comment("} c2i_unverified_entry");
+ }
+
+ address c2i_entry = __ pc();
+
+ // Class initialization barrier for static methods
+ address c2i_no_clinit_check_entry = NULL;
+ if (VM_Version::supports_fast_class_init_checks()) {
+ Label L_skip_barrier;
+
+ { // Bypass the barrier for non-static methods
+ __ lwu(t0, Address(xmethod, Method::access_flags_offset()));
+ __ andi(t1, t0, JVM_ACC_STATIC);
+ __ beqz(t1, L_skip_barrier); // non-static
+ }
+
+ __ load_method_holder(t1, xmethod);
+ __ clinit_barrier(t1, t0, &L_skip_barrier);
+ __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
+
+ __ bind(L_skip_barrier);
+ c2i_no_clinit_check_entry = __ pc();
+ }
+
+ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+ bs->c2i_entry_barrier(masm);
+
+ gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
+
+ __ flush();
+ return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry);
+}
+
+int SharedRuntime::vector_calling_convention(VMRegPair *regs,
+ uint num_bits,
+ uint total_args_passed) {
+ Unimplemented();
+ return 0;
+}
+
+int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
+ VMRegPair *regs,
+ VMRegPair *regs2,
+ int total_args_passed) {
+ assert(regs2 == NULL, "not needed on riscv");
+
+ // We return the amount of VMRegImpl stack slots we need to reserve for all
+ // the arguments NOT counting out_preserve_stack_slots.
+
+ static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
+ c_rarg0, c_rarg1, c_rarg2, c_rarg3,
+ c_rarg4, c_rarg5, c_rarg6, c_rarg7
+ };
+ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
+ c_farg0, c_farg1, c_farg2, c_farg3,
+ c_farg4, c_farg5, c_farg6, c_farg7
+ };
+
+ uint int_args = 0;
+ uint fp_args = 0;
+ uint stk_args = 0; // inc by 2 each time
+
+ for (int i = 0; i < total_args_passed; i++) {
+ switch (sig_bt[i]) {
+ case T_BOOLEAN: // fall through
+ case T_CHAR: // fall through
+ case T_BYTE: // fall through
+ case T_SHORT: // fall through
+ case T_INT:
+ if (int_args < Argument::n_int_register_parameters_c) {
+ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
+ } else {
+ regs[i].set1(VMRegImpl::stack2reg(stk_args));
+ stk_args += 2;
+ }
+ break;
+ case T_LONG: // fall through
+ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
+ case T_OBJECT: // fall through
+ case T_ARRAY: // fall through
+ case T_ADDRESS: // fall through
+ case T_METADATA:
+ if (int_args < Argument::n_int_register_parameters_c) {
+ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
+ } else {
+ regs[i].set2(VMRegImpl::stack2reg(stk_args));
+ stk_args += 2;
+ }
+ break;
+ case T_FLOAT:
+ if (fp_args < Argument::n_float_register_parameters_c) {
+ regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
+ } else if (int_args < Argument::n_int_register_parameters_c) {
+ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
+ } else {
+ regs[i].set1(VMRegImpl::stack2reg(stk_args));
+ stk_args += 2;
+ }
+ break;
+ case T_DOUBLE:
+ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
+ if (fp_args < Argument::n_float_register_parameters_c) {
+ regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
+ } else if (int_args < Argument::n_int_register_parameters_c) {
+ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
+ } else {
+ regs[i].set2(VMRegImpl::stack2reg(stk_args));
+ stk_args += 2;
+ }
+ break;
+ case T_VOID: // Halves of longs and doubles
+ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
+ regs[i].set_bad();
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ }
+
+ return stk_args;
+}
+
+// On 64 bit we will store integer like items to the stack as
+// 64 bits items (riscv64 abi) even though java would only store
+// 32bits for a parameter. On 32bit it will simply be 32 bits
+// So this routine will do 32->32 on 32bit and 32->64 on 64bit
+static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+ assert_cond(masm != NULL);
+ if (src.first()->is_stack()) {
+ if (dst.first()->is_stack()) {
+ // stack to stack
+ __ ld(t0, Address(fp, reg2offset_in(src.first())));
+ __ sd(t0, Address(sp, reg2offset_out(dst.first())));
+ } else {
+ // stack to reg
+ __ lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
+ }
+ } else if (dst.first()->is_stack()) {
+ // reg to stack
+ __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
+ } else {
+ if (dst.first() != src.first()) {
+ // 32bits extend sign
+ __ addw(dst.first()->as_Register(), src.first()->as_Register(), zr);
+ }
+ }
+}
+
+// An oop arg. Must pass a handle not the oop itself
+static void object_move(MacroAssembler* masm,
+ OopMap* map,
+ int oop_handle_offset,
+ int framesize_in_slots,
+ VMRegPair src,
+ VMRegPair dst,
+ bool is_receiver,
+ int* receiver_offset) {
+ assert_cond(masm != NULL && map != NULL && receiver_offset != NULL);
+ // must pass a handle. First figure out the location we use as a handle
+ Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register();
+
+ // See if oop is NULL if it is we need no handle
+
+ if (src.first()->is_stack()) {
+
+ // Oop is already on the stack as an argument
+ int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
+ map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
+ if (is_receiver) {
+ *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
+ }
+
+ __ ld(t0, Address(fp, reg2offset_in(src.first())));
+ __ la(rHandle, Address(fp, reg2offset_in(src.first())));
+ // conditionally move a NULL
+ Label notZero1;
+ __ bnez(t0, notZero1);
+ __ mv(rHandle, zr);
+ __ bind(notZero1);
+ } else {
+
+ // Oop is in an a register we must store it to the space we reserve
+ // on the stack for oop_handles and pass a handle if oop is non-NULL
+
+ const Register rOop = src.first()->as_Register();
+ int oop_slot = -1;
+ if (rOop == j_rarg0) {
+ oop_slot = 0;
+ } else if (rOop == j_rarg1) {
+ oop_slot = 1;
+ } else if (rOop == j_rarg2) {
+ oop_slot = 2;
+ } else if (rOop == j_rarg3) {
+ oop_slot = 3;
+ } else if (rOop == j_rarg4) {
+ oop_slot = 4;
+ } else if (rOop == j_rarg5) {
+ oop_slot = 5;
+ } else if (rOop == j_rarg6) {
+ oop_slot = 6;
+ } else {
+ assert(rOop == j_rarg7, "wrong register");
+ oop_slot = 7;
+ }
+
+ oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset;
+ int offset = oop_slot * VMRegImpl::stack_slot_size;
+
+ map->set_oop(VMRegImpl::stack2reg(oop_slot));
+ // Store oop in handle area, may be NULL
+ __ sd(rOop, Address(sp, offset));
+ if (is_receiver) {
+ *receiver_offset = offset;
+ }
+
+ //rOop maybe the same as rHandle
+ if (rOop == rHandle) {
+ Label isZero;
+ __ beqz(rOop, isZero);
+ __ la(rHandle, Address(sp, offset));
+ __ bind(isZero);
+ } else {
+ Label notZero2;
+ __ la(rHandle, Address(sp, offset));
+ __ bnez(rOop, notZero2);
+ __ mv(rHandle, zr);
+ __ bind(notZero2);
+ }
+ }
+
+ // If arg is on the stack then place it otherwise it is already in correct reg.
+ if (dst.first()->is_stack()) {
+ __ sd(rHandle, Address(sp, reg2offset_out(dst.first())));
+ }
+}
+
+// A float arg may have to do float reg int reg conversion
+static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+ assert(src.first()->is_stack() && dst.first()->is_stack() ||
+ src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
+ assert_cond(masm != NULL);
+ if (src.first()->is_stack()) {
+ if (dst.first()->is_stack()) {
+ __ lwu(t0, Address(fp, reg2offset_in(src.first())));
+ __ sw(t0, Address(sp, reg2offset_out(dst.first())));
+ } else if (dst.first()->is_Register()) {
+ __ lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
+ } else {
+ ShouldNotReachHere();
+ }
+ } else if (src.first() != dst.first()) {
+ if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
+ __ fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
+ } else {
+ ShouldNotReachHere();
+ }
+ }
+}
+
+// A long move
+static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+ assert_cond(masm != NULL);
+ if (src.first()->is_stack()) {
+ if (dst.first()->is_stack()) {
+ // stack to stack
+ __ ld(t0, Address(fp, reg2offset_in(src.first())));
+ __ sd(t0, Address(sp, reg2offset_out(dst.first())));
+ } else {
+ // stack to reg
+ __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
+ }
+ } else if (dst.first()->is_stack()) {
+ // reg to stack
+ __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
+ } else {
+ if (dst.first() != src.first()) {
+ __ mv(dst.first()->as_Register(), src.first()->as_Register());
+ }
+ }
+}
+
+// A double move
+static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+ assert(src.first()->is_stack() && dst.first()->is_stack() ||
+ src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
+ assert_cond(masm != NULL);
+ if (src.first()->is_stack()) {
+ if (dst.first()->is_stack()) {
+ __ ld(t0, Address(fp, reg2offset_in(src.first())));
+ __ sd(t0, Address(sp, reg2offset_out(dst.first())));
+ } else if (dst.first()-> is_Register()) {
+ __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
+ } else {
+ ShouldNotReachHere();
+ }
+ } else if (src.first() != dst.first()) {
+ if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
+ __ fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
+ } else {
+ ShouldNotReachHere();
+ }
+ }
+}
+
+void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
+ assert_cond(masm != NULL);
+ // We always ignore the frame_slots arg and just use the space just below frame pointer
+ // which by this time is free to use
+ switch (ret_type) {
+ case T_FLOAT:
+ __ fsw(f10, Address(fp, -3 * wordSize));
+ break;
+ case T_DOUBLE:
+ __ fsd(f10, Address(fp, -3 * wordSize));
+ break;
+ case T_VOID: break;
+ default: {
+ __ sd(x10, Address(fp, -3 * wordSize));
+ }
+ }
+}
+
+void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
+ assert_cond(masm != NULL);
+ // We always ignore the frame_slots arg and just use the space just below frame pointer
+ // which by this time is free to use
+ switch (ret_type) {
+ case T_FLOAT:
+ __ flw(f10, Address(fp, -3 * wordSize));
+ break;
+ case T_DOUBLE:
+ __ fld(f10, Address(fp, -3 * wordSize));
+ break;
+ case T_VOID: break;
+ default: {
+ __ ld(x10, Address(fp, -3 * wordSize));
+ }
+ }
+}
+
+static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
+ assert_cond(masm != NULL && args != NULL);
+ RegSet x;
+ for ( int i = first_arg ; i < arg_count ; i++ ) {
+ if (args[i].first()->is_Register()) {
+ x = x + args[i].first()->as_Register();
+ } else if (args[i].first()->is_FloatRegister()) {
+ __ addi(sp, sp, -2 * wordSize);
+ __ fsd(args[i].first()->as_FloatRegister(), Address(sp, 0));
+ }
+ }
+ __ push_reg(x, sp);
+}
+
+static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
+ assert_cond(masm != NULL && args != NULL);
+ RegSet x;
+ for ( int i = first_arg ; i < arg_count ; i++ ) {
+ if (args[i].first()->is_Register()) {
+ x = x + args[i].first()->as_Register();
+ } else {
+ ;
+ }
+ }
+ __ pop_reg(x, sp);
+ for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
+ if (args[i].first()->is_Register()) {
+ ;
+ } else if (args[i].first()->is_FloatRegister()) {
+ __ fld(args[i].first()->as_FloatRegister(), Address(sp, 0));
+ __ add(sp, sp, 2 * wordSize);
+ }
+ }
+}
+
+static void rt_call(MacroAssembler* masm, address dest) {
+ assert_cond(masm != NULL);
+ CodeBlob *cb = CodeCache::find_blob(dest);
+ if (cb) {
+ __ far_call(RuntimeAddress(dest));
+ } else {
+ int32_t offset = 0;
+ __ la_patchable(t0, RuntimeAddress(dest), offset);
+ __ jalr(x1, t0, offset);
+ }
+}
+
+static void verify_oop_args(MacroAssembler* masm,
+ const methodHandle& method,
+ const BasicType* sig_bt,
+ const VMRegPair* regs) {
+ const Register temp_reg = x9; // not part of any compiled calling seq
+ if (VerifyOops) {
+ for (int i = 0; i < method->size_of_parameters(); i++) {
+ if (sig_bt[i] == T_OBJECT ||
+ sig_bt[i] == T_ARRAY) {
+ VMReg r = regs[i].first();
+ assert(r->is_valid(), "bad oop arg");
+ if (r->is_stack()) {
+ __ ld(temp_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
+ __ verify_oop(temp_reg);
+ } else {
+ __ verify_oop(r->as_Register());
+ }
+ }
+ }
+ }
+}
+
+static void gen_special_dispatch(MacroAssembler* masm,
+ const methodHandle& method,
+ const BasicType* sig_bt,
+ const VMRegPair* regs) {
+ verify_oop_args(masm, method, sig_bt, regs);
+ vmIntrinsics::ID iid = method->intrinsic_id();
+
+ // Now write the args into the outgoing interpreter space
+ bool has_receiver = false;
+ Register receiver_reg = noreg;
+ int member_arg_pos = -1;
+ Register member_reg = noreg;
+ int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
+ if (ref_kind != 0) {
+ member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument
+ member_reg = x9; // known to be free at this point
+ has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
+ } else if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) {
+ has_receiver = true;
+ } else {
+ fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
+ }
+
+ if (member_reg != noreg) {
+ // Load the member_arg into register, if necessary.
+ SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
+ VMReg r = regs[member_arg_pos].first();
+ if (r->is_stack()) {
+ __ ld(member_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
+ } else {
+ // no data motion is needed
+ member_reg = r->as_Register();
+ }
+ }
+
+ if (has_receiver) {
+ // Make sure the receiver is loaded into a register.
+ assert(method->size_of_parameters() > 0, "oob");
+ assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
+ VMReg r = regs[0].first();
+ assert(r->is_valid(), "bad receiver arg");
+ if (r->is_stack()) {
+ // Porting note: This assumes that compiled calling conventions always
+ // pass the receiver oop in a register. If this is not true on some
+ // platform, pick a temp and load the receiver from stack.
+ fatal("receiver always in a register");
+ receiver_reg = x12; // known to be free at this point
+ __ ld(receiver_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
+ } else {
+ // no data motion is needed
+ receiver_reg = r->as_Register();
+ }
+ }
+
+ // Figure out which address we are really jumping to:
+ MethodHandles::generate_method_handle_dispatch(masm, iid,
+ receiver_reg, member_reg, /*for_compiler_entry:*/ true);
+}
+
+// ---------------------------------------------------------------------------
+// Generate a native wrapper for a given method. The method takes arguments
+// in the Java compiled code convention, marshals them to the native
+// convention (handlizes oops, etc), transitions to native, makes the call,
+// returns to java state (possibly blocking), unhandlizes any result and
+// returns.
+//
+// Critical native functions are a shorthand for the use of
+// GetPrimtiveArrayCritical and disallow the use of any other JNI
+// functions. The wrapper is expected to unpack the arguments before
+// passing them to the callee and perform checks before and after the
+// native call to ensure that they GCLocker
+// lock_critical/unlock_critical semantics are followed. Some other
+// parts of JNI setup are skipped like the tear down of the JNI handle
+// block and the check for pending exceptions it's impossible for them
+// to be thrown.
+//
+// They are roughly structured like this:
+// if (GCLocker::needs_gc()) SharedRuntime::block_for_jni_critical()
+// tranistion to thread_in_native
+// unpack arrray arguments and call native entry point
+// check for safepoint in progress
+// check if any thread suspend flags are set
+// call into JVM and possible unlock the JNI critical
+// if a GC was suppressed while in the critical native.
+// transition back to thread_in_Java
+// return to caller
+//
+nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
+ const methodHandle& method,
+ int compile_id,
+ BasicType* in_sig_bt,
+ VMRegPair* in_regs,
+ BasicType ret_type) {
+ if (method->is_method_handle_intrinsic()) {
+ vmIntrinsics::ID iid = method->intrinsic_id();
+ intptr_t start = (intptr_t)__ pc();
+ int vep_offset = ((intptr_t)__ pc()) - start;
+
+ // First instruction must be a nop as it may need to be patched on deoptimisation
+ __ nop();
+ gen_special_dispatch(masm,
+ method,
+ in_sig_bt,
+ in_regs);
+ int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period
+ __ flush();
+ int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually
+ return nmethod::new_native_nmethod(method,
+ compile_id,
+ masm->code(),
+ vep_offset,
+ frame_complete,
+ stack_slots / VMRegImpl::slots_per_word,
+ in_ByteSize(-1),
+ in_ByteSize(-1),
+ (OopMapSet*)NULL);
+ }
+ address native_func = method->native_function();
+ assert(native_func != NULL, "must have function");
+
+ // An OopMap for lock (and class if static)
+ OopMapSet *oop_maps = new OopMapSet();
+ assert_cond(oop_maps != NULL);
+ intptr_t start = (intptr_t)__ pc();
+
+ // We have received a description of where all the java arg are located
+ // on entry to the wrapper. We need to convert these args to where
+ // the jni function will expect them. To figure out where they go
+ // we convert the java signature to a C signature by inserting
+ // the hidden arguments as arg[0] and possibly arg[1] (static method)
+
+ const int total_in_args = method->size_of_parameters();
+ int total_c_args = total_in_args + (method->is_static() ? 2 : 1);
+
+ BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
+ VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
+ BasicType* in_elem_bt = NULL;
+
+ int argc = 0;
+ out_sig_bt[argc++] = T_ADDRESS;
+ if (method->is_static()) {
+ out_sig_bt[argc++] = T_OBJECT;
+ }
+
+ for (int i = 0; i < total_in_args ; i++) {
+ out_sig_bt[argc++] = in_sig_bt[i];
+ }
+
+ // Now figure out where the args must be stored and how much stack space
+ // they require.
+ int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
+
+ // Compute framesize for the wrapper. We need to handlize all oops in
+ // incoming registers
+
+ // Calculate the total number of stack slots we will need.
+
+ // First count the abi requirement plus all of the outgoing args
+ int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
+
+ // Now the space for the inbound oop handle area
+ int total_save_slots = 8 * VMRegImpl::slots_per_word; // 8 arguments passed in registers
+
+ int oop_handle_offset = stack_slots;
+ stack_slots += total_save_slots;
+
+ // Now any space we need for handlizing a klass if static method
+
+ int klass_slot_offset = 0;
+ int klass_offset = -1;
+ int lock_slot_offset = 0;
+ bool is_static = false;
+
+ if (method->is_static()) {
+ klass_slot_offset = stack_slots;
+ stack_slots += VMRegImpl::slots_per_word;
+ klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
+ is_static = true;
+ }
+
+ // Plus a lock if needed
+
+ if (method->is_synchronized()) {
+ lock_slot_offset = stack_slots;
+ stack_slots += VMRegImpl::slots_per_word;
+ }
+
+ // Now a place (+2) to save return values or temp during shuffling
+ // + 4 for return address (which we own) and saved fp
+ stack_slots += 6;
+
+ // Ok The space we have allocated will look like:
+ //
+ //
+ // FP-> | |
+ // | 2 slots (ra) |
+ // | 2 slots (fp) |
+ // |---------------------|
+ // | 2 slots for moves |
+ // |---------------------|
+ // | lock box (if sync) |
+ // |---------------------| <- lock_slot_offset
+ // | klass (if static) |
+ // |---------------------| <- klass_slot_offset
+ // | oopHandle area |
+ // |---------------------| <- oop_handle_offset (8 java arg registers)
+ // | outbound memory |
+ // | based arguments |
+ // | |
+ // |---------------------|
+ // | |
+ // SP-> | out_preserved_slots |
+ //
+ //
+
+
+ // Now compute actual number of stack words we need rounding to make
+ // stack properly aligned.
+ stack_slots = align_up(stack_slots, StackAlignmentInSlots);
+
+ int stack_size = stack_slots * VMRegImpl::stack_slot_size;
+
+ // First thing make an ic check to see if we should even be here
+
+ // We are free to use all registers as temps without saving them and
+ // restoring them except fp. fp is the only callee save register
+ // as far as the interpreter and the compiler(s) are concerned.
+
+
+ const Register ic_reg = t1;
+ const Register receiver = j_rarg0;
+
+ Label hit;
+ Label exception_pending;
+
+ assert_different_registers(ic_reg, receiver, t0);
+ __ verify_oop(receiver);
+ __ cmp_klass(receiver, ic_reg, t0, hit);
+
+ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+
+ // Verified entry point must be aligned
+ __ align(8);
+
+ __ bind(hit);
+
+ int vep_offset = ((intptr_t)__ pc()) - start;
+
+ // If we have to make this method not-entrant we'll overwrite its
+ // first instruction with a jump.
+ __ nop();
+
+ if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) {
+ Label L_skip_barrier;
+ __ mov_metadata(t1, method->method_holder()); // InstanceKlass*
+ __ clinit_barrier(t1, t0, &L_skip_barrier);
+ __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
+
+ __ bind(L_skip_barrier);
+ }
+
+ // Generate stack overflow check
+ __ bang_stack_with_offset(checked_cast(StackOverflow::stack_shadow_zone_size()));
+
+ // Generate a new frame for the wrapper.
+ __ enter();
+ // -2 because return address is already present and so is saved fp
+ __ sub(sp, sp, stack_size - 2 * wordSize);
+
+ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+ assert_cond(bs != NULL);
+ bs->nmethod_entry_barrier(masm);
+
+ // Frame is now completed as far as size and linkage.
+ int frame_complete = ((intptr_t)__ pc()) - start;
+
+ // We use x18 as the oop handle for the receiver/klass
+ // It is callee save so it survives the call to native
+
+ const Register oop_handle_reg = x18;
+
+ //
+ // We immediately shuffle the arguments so that any vm call we have to
+ // make from here on out (sync slow path, jvmti, etc.) we will have
+ // captured the oops from our caller and have a valid oopMap for
+ // them.
+
+ // -----------------
+ // The Grand Shuffle
+
+ // The Java calling convention is either equal (linux) or denser (win64) than the
+ // c calling convention. However the because of the jni_env argument the c calling
+ // convention always has at least one more (and two for static) arguments than Java.
+ // Therefore if we move the args from java -> c backwards then we will never have
+ // a register->register conflict and we don't have to build a dependency graph
+ // and figure out how to break any cycles.
+ //
+
+ // Record esp-based slot for receiver on stack for non-static methods
+ int receiver_offset = -1;
+
+ // This is a trick. We double the stack slots so we can claim
+ // the oops in the caller's frame. Since we are sure to have
+ // more args than the caller doubling is enough to make
+ // sure we can capture all the incoming oop args from the
+ // caller.
+ //
+ OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+ assert_cond(map != NULL);
+
+ int float_args = 0;
+ int int_args = 0;
+
+#ifdef ASSERT
+ bool reg_destroyed[RegisterImpl::number_of_registers];
+ bool freg_destroyed[FloatRegisterImpl::number_of_registers];
+ for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
+ reg_destroyed[r] = false;
+ }
+ for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
+ freg_destroyed[f] = false;
+ }
+
+#endif /* ASSERT */
+
+ // For JNI natives the incoming and outgoing registers are offset upwards.
+ GrowableArray arg_order(2 * total_in_args);
+ VMRegPair tmp_vmreg;
+ tmp_vmreg.set2(x9->as_VMReg());
+
+ for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
+ arg_order.push(i);
+ arg_order.push(c_arg);
+ }
+
+ int temploc = -1;
+ for (int ai = 0; ai < arg_order.length(); ai += 2) {
+ int i = arg_order.at(ai);
+ int c_arg = arg_order.at(ai + 1);
+ __ block_comment(err_msg("mv %d -> %d", i, c_arg));
+ assert(c_arg != -1 && i != -1, "wrong order");
+#ifdef ASSERT
+ if (in_regs[i].first()->is_Register()) {
+ assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
+ } else if (in_regs[i].first()->is_FloatRegister()) {
+ assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
+ }
+ if (out_regs[c_arg].first()->is_Register()) {
+ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
+ } else if (out_regs[c_arg].first()->is_FloatRegister()) {
+ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
+ }
+#endif /* ASSERT */
+ switch (in_sig_bt[i]) {
+ case T_ARRAY:
+ case T_OBJECT:
+ object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
+ ((i == 0) && (!is_static)),
+ &receiver_offset);
+ int_args++;
+ break;
+ case T_VOID:
+ break;
+
+ case T_FLOAT:
+ float_move(masm, in_regs[i], out_regs[c_arg]);
+ float_args++;
+ break;
+
+ case T_DOUBLE:
+ assert( i + 1 < total_in_args &&
+ in_sig_bt[i + 1] == T_VOID &&
+ out_sig_bt[c_arg + 1] == T_VOID, "bad arg list");
+ double_move(masm, in_regs[i], out_regs[c_arg]);
+ float_args++;
+ break;
+
+ case T_LONG :
+ long_move(masm, in_regs[i], out_regs[c_arg]);
+ int_args++;
+ break;
+
+ case T_ADDRESS:
+ assert(false, "found T_ADDRESS in java args");
+ break;
+
+ default:
+ move32_64(masm, in_regs[i], out_regs[c_arg]);
+ int_args++;
+ }
+ }
+
+ // point c_arg at the first arg that is already loaded in case we
+ // need to spill before we call out
+ int c_arg = total_c_args - total_in_args;
+
+ // Pre-load a static method's oop into c_rarg1.
+ if (method->is_static()) {
+
+ // load oop into a register
+ __ movoop(c_rarg1,
+ JNIHandles::make_local(method->method_holder()->java_mirror()),
+ /*immediate*/true);
+
+ // Now handlize the static class mirror it's known not-null.
+ __ sd(c_rarg1, Address(sp, klass_offset));
+ map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
+
+ // Now get the handle
+ __ la(c_rarg1, Address(sp, klass_offset));
+ // and protect the arg if we must spill
+ c_arg--;
+ }
+
+ // Change state to native (we save the return address in the thread, since it might not
+ // be pushed on the stack when we do a stack traversal).
+ // We use the same pc/oopMap repeatedly when we call out
+
+ Label native_return;
+ __ set_last_Java_frame(sp, noreg, native_return, t0);
+
+ Label dtrace_method_entry, dtrace_method_entry_done;
+ {
+ int32_t offset = 0;
+ __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset);
+ __ lbu(t0, Address(t0, offset));
+ __ addw(t0, t0, zr);
+ __ bnez(t0, dtrace_method_entry);
+ __ bind(dtrace_method_entry_done);
+ }
+
+ // RedefineClasses() tracing support for obsolete method entry
+ if (log_is_enabled(Trace, redefine, class, obsolete)) {
+ // protect the args we've loaded
+ save_args(masm, total_c_args, c_arg, out_regs);
+ __ mov_metadata(c_rarg1, method());
+ __ call_VM_leaf(
+ CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
+ xthread, c_rarg1);
+ restore_args(masm, total_c_args, c_arg, out_regs);
+ }
+
+ // Lock a synchronized method
+
+ // Register definitions used by locking and unlocking
+
+ const Register swap_reg = x10;
+ const Register obj_reg = x9; // Will contain the oop
+ const Register lock_reg = x30; // Address of compiler lock object (BasicLock)
+ const Register old_hdr = x30; // value of old header at unlock time
+ const Register tmp = ra;
+
+ Label slow_path_lock;
+ Label lock_done;
+
+ if (method->is_synchronized()) {
+
+ const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
+
+ // Get the handle (the 2nd argument)
+ __ mv(oop_handle_reg, c_rarg1);
+
+ // Get address of the box
+
+ __ la(lock_reg, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
+
+ // Load the oop from the handle
+ __ ld(obj_reg, Address(oop_handle_reg, 0));
+
+ if (!UseHeavyMonitors) {
+ // Load (object->mark() | 1) into swap_reg % x10
+ __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+ __ ori(swap_reg, t0, 1);
+
+ // Save (object->mark() | 1) into BasicLock's displaced header
+ __ sd(swap_reg, Address(lock_reg, mark_word_offset));
+
+ // src -> dest if dest == x10 else x10 <- dest
+ {
+ Label here;
+ __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL);
+ }
+
+ // Test if the oopMark is an obvious stack pointer, i.e.,
+ // 1) (mark & 3) == 0, and
+ // 2) sp <= mark < mark + os::pagesize()
+ // These 3 tests can be done by evaluating the following
+ // expression: ((mark - sp) & (3 - os::vm_page_size())),
+ // assuming both stack pointer and pagesize have their
+ // least significant 2 bits clear.
+ // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg
+
+ __ sub(swap_reg, swap_reg, sp);
+ __ andi(swap_reg, swap_reg, 3 - os::vm_page_size());
+
+ // Save the test result, for recursive case, the result is zero
+ __ sd(swap_reg, Address(lock_reg, mark_word_offset));
+ __ bnez(swap_reg, slow_path_lock);
+ } else {
+ __ j(slow_path_lock);
+ }
+
+ // Slow path will re-enter here
+ __ bind(lock_done);
+ }
+
+
+ // Finally just about ready to make the JNI call
+
+ // get JNIEnv* which is first argument to native
+ __ la(c_rarg0, Address(xthread, in_bytes(JavaThread::jni_environment_offset())));
+
+ // Now set thread in native
+ __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
+ __ mv(t0, _thread_in_native);
+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
+ __ sw(t0, Address(t1));
+
+ rt_call(masm, native_func);
+
+ __ bind(native_return);
+
+ intptr_t return_pc = (intptr_t) __ pc();
+ oop_maps->add_gc_map(return_pc - start, map);
+
+ // Unpack native results.
+ if (ret_type != T_OBJECT && ret_type != T_ARRAY) {
+ __ cast_primitive_type(ret_type, x10);
+ }
+
+ Label safepoint_in_progress, safepoint_in_progress_done;
+ Label after_transition;
+
+ // Switch thread to "native transition" state before reading the synchronization state.
+ // This additional state is necessary because reading and testing the synchronization
+ // state is not atomic w.r.t. GC, as this scenario demonstrates:
+ // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
+ // VM thread changes sync state to synchronizing and suspends threads for GC.
+ // Thread A is resumed to finish this native method, but doesn't block here since it
+ // didn't see any synchronization is progress, and escapes.
+ __ mv(t0, _thread_in_native_trans);
+
+ __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
+
+ // Force this write out before the read below
+ __ membar(MacroAssembler::AnyAny);
+
+ // check for safepoint operation in progress and/or pending suspend requests
+ {
+ // We need an acquire here to ensure that any subsequent load of the
+ // global SafepointSynchronize::_state flag is ordered after this load
+ // of the thread-local polling word. We don't want this poll to
+ // return false (i.e. not safepointing) and a later poll of the global
+ // SafepointSynchronize::_state spuriously to return true.
+ // This is to avoid a race when we're in a native->Java transition
+ // racing the code which wakes up from a safepoint.
+
+ __ safepoint_poll(safepoint_in_progress, true /* at_return */, true /* acquire */, false /* in_nmethod */);
+ __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset()));
+ __ bnez(t0, safepoint_in_progress);
+ __ bind(safepoint_in_progress_done);
+ }
+
+ // change thread state
+ __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
+ __ mv(t0, _thread_in_Java);
+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
+ __ sw(t0, Address(t1));
+ __ bind(after_transition);
+
+ Label reguard;
+ Label reguard_done;
+ __ lbu(t0, Address(xthread, JavaThread::stack_guard_state_offset()));
+ __ mv(t1, StackOverflow::stack_guard_yellow_reserved_disabled);
+ __ beq(t0, t1, reguard);
+ __ bind(reguard_done);
+
+ // native result if any is live
+
+ // Unlock
+ Label unlock_done;
+ Label slow_path_unlock;
+ if (method->is_synchronized()) {
+
+ // Get locked oop from the handle we passed to jni
+ __ ld(obj_reg, Address(oop_handle_reg, 0));
+
+ Label done;
+
+ if (!UseHeavyMonitors) {
+ // Simple recursive lock?
+ __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
+ __ beqz(t0, done);
+ }
+
+
+ // Must save x10 if if it is live now because cmpxchg must use it
+ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
+ save_native_result(masm, ret_type, stack_slots);
+ }
+
+ if (!UseHeavyMonitors) {
+ // get address of the stack lock
+ __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
+ // get old displaced header
+ __ ld(old_hdr, Address(x10, 0));
+
+ // Atomic swap old header if oop still contains the stack lock
+ Label succeed;
+ __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock);
+ __ bind(succeed);
+ } else {
+ __ j(slow_path_unlock);
+ }
+
+ // slow path re-enters here
+ __ bind(unlock_done);
+ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
+ restore_native_result(masm, ret_type, stack_slots);
+ }
+
+ __ bind(done);
+ }
+
+ Label dtrace_method_exit, dtrace_method_exit_done;
+ {
+ int32_t offset = 0;
+ __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset);
+ __ lbu(t0, Address(t0, offset));
+ __ bnez(t0, dtrace_method_exit);
+ __ bind(dtrace_method_exit_done);
+ }
+
+ __ reset_last_Java_frame(false);
+
+ // Unbox oop result, e.g. JNIHandles::resolve result.
+ if (is_reference_type(ret_type)) {
+ __ resolve_jobject(x10, xthread, t1);
+ }
+
+ if (CheckJNICalls) {
+ // clear_pending_jni_exception_check
+ __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset()));
+ }
+
+ // reset handle block
+ __ ld(x12, Address(xthread, JavaThread::active_handles_offset()));
+ __ sd(zr, Address(x12, JNIHandleBlock::top_offset_in_bytes()));
+
+ __ leave();
+
+ // Any exception pending?
+ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
+ __ bnez(t0, exception_pending);
+
+ // We're done
+ __ ret();
+
+ // Unexpected paths are out of line and go here
+
+ // forward the exception
+ __ bind(exception_pending);
+
+ // and forward the exception
+ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+
+ // Slow path locking & unlocking
+ if (method->is_synchronized()) {
+
+ __ block_comment("Slow path lock {");
+ __ bind(slow_path_lock);
+
+ // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
+ // args are (oop obj, BasicLock* lock, JavaThread* thread)
+
+ // protect the args we've loaded
+ save_args(masm, total_c_args, c_arg, out_regs);
+
+ __ mv(c_rarg0, obj_reg);
+ __ mv(c_rarg1, lock_reg);
+ __ mv(c_rarg2, xthread);
+
+ // Not a leaf but we have last_Java_frame setup as we want
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3);
+ restore_args(masm, total_c_args, c_arg, out_regs);
+
+#ifdef ASSERT
+ { Label L;
+ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
+ __ beqz(t0, L);
+ __ stop("no pending exception allowed on exit from monitorenter");
+ __ bind(L);
+ }
+#endif
+ __ j(lock_done);
+
+ __ block_comment("} Slow path lock");
+
+ __ block_comment("Slow path unlock {");
+ __ bind(slow_path_unlock);
+
+ if (ret_type == T_FLOAT || ret_type == T_DOUBLE) {
+ save_native_result(masm, ret_type, stack_slots);
+ }
+
+ __ mv(c_rarg2, xthread);
+ __ la(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
+ __ mv(c_rarg0, obj_reg);
+
+ // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
+ // NOTE that obj_reg == x9 currently
+ __ ld(x9, Address(xthread, in_bytes(Thread::pending_exception_offset())));
+ __ sd(zr, Address(xthread, in_bytes(Thread::pending_exception_offset())));
+
+ rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
+
+#ifdef ASSERT
+ {
+ Label L;
+ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
+ __ beqz(t0, L);
+ __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
+ __ bind(L);
+ }
+#endif /* ASSERT */
+
+ __ sd(x9, Address(xthread, in_bytes(Thread::pending_exception_offset())));
+
+ if (ret_type == T_FLOAT || ret_type == T_DOUBLE) {
+ restore_native_result(masm, ret_type, stack_slots);
+ }
+ __ j(unlock_done);
+
+ __ block_comment("} Slow path unlock");
+
+ } // synchronized
+
+ // SLOW PATH Reguard the stack if needed
+
+ __ bind(reguard);
+ save_native_result(masm, ret_type, stack_slots);
+ rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
+ restore_native_result(masm, ret_type, stack_slots);
+ // and continue
+ __ j(reguard_done);
+
+ // SLOW PATH safepoint
+ {
+ __ block_comment("safepoint {");
+ __ bind(safepoint_in_progress);
+
+ // Don't use call_VM as it will see a possible pending exception and forward it
+ // and never return here preventing us from clearing _last_native_pc down below.
+ //
+ save_native_result(masm, ret_type, stack_slots);
+ __ mv(c_rarg0, xthread);
+#ifndef PRODUCT
+ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
+#endif
+ int32_t offset = 0;
+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset);
+ __ jalr(x1, t0, offset);
+
+ // Restore any method result value
+ restore_native_result(masm, ret_type, stack_slots);
+
+ __ j(safepoint_in_progress_done);
+ __ block_comment("} safepoint");
+ }
+
+ // SLOW PATH dtrace support
+ {
+ __ block_comment("dtrace entry {");
+ __ bind(dtrace_method_entry);
+
+ // We have all of the arguments setup at this point. We must not touch any register
+ // argument registers at this point (what if we save/restore them there are no oop?
+
+ save_args(masm, total_c_args, c_arg, out_regs);
+ __ mov_metadata(c_rarg1, method());
+ __ call_VM_leaf(
+ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
+ xthread, c_rarg1);
+ restore_args(masm, total_c_args, c_arg, out_regs);
+ __ j(dtrace_method_entry_done);
+ __ block_comment("} dtrace entry");
+ }
+
+ {
+ __ block_comment("dtrace exit {");
+ __ bind(dtrace_method_exit);
+ save_native_result(masm, ret_type, stack_slots);
+ __ mov_metadata(c_rarg1, method());
+ __ call_VM_leaf(
+ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
+ xthread, c_rarg1);
+ restore_native_result(masm, ret_type, stack_slots);
+ __ j(dtrace_method_exit_done);
+ __ block_comment("} dtrace exit");
+ }
+
+ __ flush();
+
+ nmethod *nm = nmethod::new_native_nmethod(method,
+ compile_id,
+ masm->code(),
+ vep_offset,
+ frame_complete,
+ stack_slots / VMRegImpl::slots_per_word,
+ (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
+ in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
+ oop_maps);
+ assert(nm != NULL, "create native nmethod fail!");
+ return nm;
+}
+
+// this function returns the adjust size (in number of words) to a c2i adapter
+// activation for use during deoptimization
+int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
+ assert(callee_locals >= callee_parameters,
+ "test and remove; got more parms than locals");
+ if (callee_locals < callee_parameters) {
+ return 0; // No adjustment for negative locals
+ }
+ int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
+ // diff is counted in stack words
+ return align_up(diff, 2);
+}
+
+//------------------------------generate_deopt_blob----------------------------
+void SharedRuntime::generate_deopt_blob() {
+ // Allocate space for the code
+ ResourceMark rm;
+ // Setup code generation tools
+ int pad = 0;
+ CodeBuffer buffer("deopt_blob", 2048 + pad, 1024);
+ MacroAssembler* masm = new MacroAssembler(&buffer);
+ int frame_size_in_words = -1;
+ OopMap* map = NULL;
+ OopMapSet *oop_maps = new OopMapSet();
+ assert_cond(masm != NULL && oop_maps != NULL);
+ RegisterSaver reg_saver(COMPILER2_OR_JVMCI != 0);
+
+ // -------------
+ // This code enters when returning to a de-optimized nmethod. A return
+ // address has been pushed on the the stack, and return values are in
+ // registers.
+ // If we are doing a normal deopt then we were called from the patched
+ // nmethod from the point we returned to the nmethod. So the return
+ // address on the stack is wrong by NativeCall::instruction_size
+ // We will adjust the value so it looks like we have the original return
+ // address on the stack (like when we eagerly deoptimized).
+ // In the case of an exception pending when deoptimizing, we enter
+ // with a return address on the stack that points after the call we patched
+ // into the exception handler. We have the following register state from,
+ // e.g., the forward exception stub (see stubGenerator_riscv.cpp).
+ // x10: exception oop
+ // x9: exception handler
+ // x13: throwing pc
+ // So in this case we simply jam x13 into the useless return address and
+ // the stack looks just like we want.
+ //
+ // At this point we need to de-opt. We save the argument return
+ // registers. We call the first C routine, fetch_unroll_info(). This
+ // routine captures the return values and returns a structure which
+ // describes the current frame size and the sizes of all replacement frames.
+ // The current frame is compiled code and may contain many inlined
+ // functions, each with their own JVM state. We pop the current frame, then
+ // push all the new frames. Then we call the C routine unpack_frames() to
+ // populate these frames. Finally unpack_frames() returns us the new target
+ // address. Notice that callee-save registers are BLOWN here; they have
+ // already been captured in the vframeArray at the time the return PC was
+ // patched.
+ address start = __ pc();
+ Label cont;
+
+ // Prolog for non exception case!
+
+ // Save everything in sight.
+ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
+
+ // Normal deoptimization. Save exec mode for unpack_frames.
+ __ mvw(xcpool, Deoptimization::Unpack_deopt); // callee-saved
+ __ j(cont);
+
+ int reexecute_offset = __ pc() - start;
+
+ // Reexecute case
+ // return address is the pc describes what bci to do re-execute at
+
+ // No need to update map as each call to save_live_registers will produce identical oopmap
+ (void) reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
+
+ __ mvw(xcpool, Deoptimization::Unpack_reexecute); // callee-saved
+ __ j(cont);
+
+ int exception_offset = __ pc() - start;
+
+ // Prolog for exception case
+
+ // all registers are dead at this entry point, except for x10, and
+ // x13 which contain the exception oop and exception pc
+ // respectively. Set them in TLS and fall thru to the
+ // unpack_with_exception_in_tls entry point.
+
+ __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
+ __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
+
+ int exception_in_tls_offset = __ pc() - start;
+
+ // new implementation because exception oop is now passed in JavaThread
+
+ // Prolog for exception case
+ // All registers must be preserved because they might be used by LinearScan
+ // Exceptiop oop and throwing PC are passed in JavaThread
+ // tos: stack at point of call to method that threw the exception (i.e. only
+ // args are on the stack, no return address)
+
+ // The return address pushed by save_live_registers will be patched
+ // later with the throwing pc. The correct value is not available
+ // now because loading it from memory would destroy registers.
+
+ // NB: The SP at this point must be the SP of the method that is
+ // being deoptimized. Deoptimization assumes that the frame created
+ // here by save_live_registers is immediately below the method's SP.
+ // This is a somewhat fragile mechanism.
+
+ // Save everything in sight.
+ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
+
+ // Now it is safe to overwrite any register
+
+ // Deopt during an exception. Save exec mode for unpack_frames.
+ __ li(xcpool, Deoptimization::Unpack_exception); // callee-saved
+
+ // load throwing pc from JavaThread and patch it as the return address
+ // of the current frame. Then clear the field in JavaThread
+
+ __ ld(x13, Address(xthread, JavaThread::exception_pc_offset()));
+ __ sd(x13, Address(fp, frame::return_addr_offset * wordSize));
+ __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
+
+#ifdef ASSERT
+ // verify that there is really an exception oop in JavaThread
+ __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
+ __ verify_oop(x10);
+
+ // verify that there is no pending exception
+ Label no_pending_exception;
+ __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
+ __ beqz(t0, no_pending_exception);
+ __ stop("must not have pending exception here");
+ __ bind(no_pending_exception);
+#endif
+
+ __ bind(cont);
+
+ // Call C code. Need thread and this frame, but NOT official VM entry
+ // crud. We cannot block on this call, no GC can happen.
+ //
+ // UnrollBlock* fetch_unroll_info(JavaThread* thread)
+
+ // fetch_unroll_info needs to call last_java_frame().
+
+ Label retaddr;
+ __ set_last_Java_frame(sp, noreg, retaddr, t0);
+#ifdef ASSERT
+ {
+ Label L;
+ __ ld(t0, Address(xthread,
+ JavaThread::last_Java_fp_offset()));
+ __ beqz(t0, L);
+ __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
+ __ bind(L);
+ }
+#endif // ASSERT
+ __ mv(c_rarg0, xthread);
+ __ mv(c_rarg1, xcpool);
+ int32_t offset = 0;
+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)), offset);
+ __ jalr(x1, t0, offset);
+ __ bind(retaddr);
+
+ // Need to have an oopmap that tells fetch_unroll_info where to
+ // find any register it might need.
+ oop_maps->add_gc_map(__ pc() - start, map);
+
+ __ reset_last_Java_frame(false);
+
+ // Load UnrollBlock* into x15
+ __ mv(x15, x10);
+
+ __ lwu(xcpool, Address(x15, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
+ Label noException;
+ __ li(t0, Deoptimization::Unpack_exception);
+ __ bne(xcpool, t0, noException); // Was exception pending?
+ __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
+ __ ld(x13, Address(xthread, JavaThread::exception_pc_offset()));
+ __ sd(zr, Address(xthread, JavaThread::exception_oop_offset()));
+ __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
+
+ __ verify_oop(x10);
+
+ // Overwrite the result registers with the exception results.
+ __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
+
+ __ bind(noException);
+
+ // Only register save data is on the stack.
+ // Now restore the result registers. Everything else is either dead
+ // or captured in the vframeArray.
+
+ // Restore fp result register
+ __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
+ // Restore integer result register
+ __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
+
+ // Pop all of the register save area off the stack
+ __ add(sp, sp, frame_size_in_words * wordSize);
+
+ // All of the register save area has been popped of the stack. Only the
+ // return address remains.
+
+ // Pop all the frames we must move/replace.
+ //
+ // Frame picture (youngest to oldest)
+ // 1: self-frame (no frame link)
+ // 2: deopting frame (no frame link)
+ // 3: caller of deopting frame (could be compiled/interpreted).
+ //
+ // Note: by leaving the return address of self-frame on the stack
+ // and using the size of frame 2 to adjust the stack
+ // when we are done the return to frame 3 will still be on the stack.
+
+ // Pop deoptimized frame
+ __ lwu(x12, Address(x15, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
+ __ sub(x12, x12, 2 * wordSize);
+ __ add(sp, sp, x12);
+ __ ld(fp, Address(sp, 0));
+ __ ld(ra, Address(sp, wordSize));
+ __ addi(sp, sp, 2 * wordSize);
+ // RA should now be the return address to the caller (3)
+
+#ifdef ASSERT
+ // Compilers generate code that bang the stack by as much as the
+ // interpreter would need. So this stack banging should never
+ // trigger a fault. Verify that it does not on non product builds.
+ __ lwu(x9, Address(x15, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
+ __ bang_stack_size(x9, x12);
+#endif
+ // Load address of array of frame pcs into x12
+ __ ld(x12, Address(x15, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
+
+ // Load address of array of frame sizes into x14
+ __ ld(x14, Address(x15, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
+
+ // Load counter into x13
+ __ lwu(x13, Address(x15, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
+
+ // Now adjust the caller's stack to make up for the extra locals
+ // but record the original sp so that we can save it in the skeletal interpreter
+ // frame and the stack walking of interpreter_sender will get the unextended sp
+ // value and not the "real" sp value.
+
+ const Register sender_sp = x16;
+
+ __ mv(sender_sp, sp);
+ __ lwu(x9, Address(x15,
+ Deoptimization::UnrollBlock::
+ caller_adjustment_offset_in_bytes()));
+ __ sub(sp, sp, x9);
+
+ // Push interpreter frames in a loop
+ __ li(t0, 0xDEADDEAD); // Make a recognizable pattern
+ __ mv(t1, t0);
+ Label loop;
+ __ bind(loop);
+ __ ld(x9, Address(x14, 0)); // Load frame size
+ __ addi(x14, x14, wordSize);
+ __ sub(x9, x9, 2 * wordSize); // We'll push pc and fp by hand
+ __ ld(ra, Address(x12, 0)); // Load pc
+ __ addi(x12, x12, wordSize);
+ __ enter(); // Save old & set new fp
+ __ sub(sp, sp, x9); // Prolog
+ // This value is corrected by layout_activation_impl
+ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
+ __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
+ __ mv(sender_sp, sp); // Pass sender_sp to next frame
+ __ addi(x13, x13, -1); // Decrement counter
+ __ bnez(x13, loop);
+
+ // Re-push self-frame
+ __ ld(ra, Address(x12));
+ __ enter();
+
+ // Allocate a full sized register save area. We subtract 2 because
+ // enter() just pushed 2 words
+ __ sub(sp, sp, (frame_size_in_words - 2) * wordSize);
+
+ // Restore frame locals after moving the frame
+ __ fsd(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
+ __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
+
+ // Call C code. Need thread but NOT official VM entry
+ // crud. We cannot block on this call, no GC can happen. Call should
+ // restore return values to their stack-slots with the new SP.
+ //
+ // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode)
+
+ // Use fp because the frames look interpreted now
+ // Don't need the precise return PC here, just precise enough to point into this code blob.
+ address the_pc = __ pc();
+ __ set_last_Java_frame(sp, fp, the_pc, t0);
+
+ __ mv(c_rarg0, xthread);
+ __ mv(c_rarg1, xcpool); // second arg: exec_mode
+ offset = 0;
+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset);
+ __ jalr(x1, t0, offset);
+
+ // Set an oopmap for the call site
+ // Use the same PC we used for the last java frame
+ oop_maps->add_gc_map(the_pc - start,
+ new OopMap(frame_size_in_words, 0));
+
+ // Clear fp AND pc
+ __ reset_last_Java_frame(true);
+
+ // Collect return values
+ __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
+ __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
+
+ // Pop self-frame.
+ __ leave(); // Epilog
+
+ // Jump to interpreter
+ __ ret();
+
+ // Make sure all code is generated
+ masm->flush();
+
+ _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
+ assert(_deopt_blob != NULL, "create deoptimization blob fail!");
+ _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
+}
+
+// Number of stack slots between incoming argument block and the start of
+// a new frame. The PROLOG must add this many slots to the stack. The
+// EPILOG must remove this many slots.
+// RISCV needs two words for RA (return address) and FP (frame pointer).
+uint SharedRuntime::in_preserve_stack_slots() {
+ return 2 * VMRegImpl::slots_per_word;
+}
+
+uint SharedRuntime::out_preserve_stack_slots() {
+ return 0;
+}
+
+#ifdef COMPILER2
+//------------------------------generate_uncommon_trap_blob--------------------
+void SharedRuntime::generate_uncommon_trap_blob() {
+ // Allocate space for the code
+ ResourceMark rm;
+ // Setup code generation tools
+ CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
+ MacroAssembler* masm = new MacroAssembler(&buffer);
+ assert_cond(masm != NULL);
+
+ assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
+
+ address start = __ pc();
+
+ // Push self-frame. We get here with a return address in RA
+ // and sp should be 16 byte aligned
+ // push fp and retaddr by hand
+ __ addi(sp, sp, -2 * wordSize);
+ __ sd(ra, Address(sp, wordSize));
+ __ sd(fp, Address(sp, 0));
+ // we don't expect an arg reg save area
+#ifndef PRODUCT
+ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
+#endif
+ // compiler left unloaded_class_index in j_rarg0 move to where the
+ // runtime expects it.
+ __ addiw(c_rarg1, j_rarg0, 0);
+
+ // we need to set the past SP to the stack pointer of the stub frame
+ // and the pc to the address where this runtime call will return
+ // although actually any pc in this code blob will do).
+ Label retaddr;
+ __ set_last_Java_frame(sp, noreg, retaddr, t0);
+
+ // Call C code. Need thread but NOT official VM entry
+ // crud. We cannot block on this call, no GC can happen. Call should
+ // capture callee-saved registers as well as return values.
+ //
+ // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index, jint exec_mode)
+ //
+ // n.b. 3 gp args, 0 fp args, integral return type
+
+ __ mv(c_rarg0, xthread);
+ __ mvw(c_rarg2, (unsigned)Deoptimization::Unpack_uncommon_trap);
+ int32_t offset = 0;
+ __ la_patchable(t0,
+ RuntimeAddress(CAST_FROM_FN_PTR(address,
+ Deoptimization::uncommon_trap)), offset);
+ __ jalr(x1, t0, offset);
+ __ bind(retaddr);
+
+ // Set an oopmap for the call site
+ OopMapSet* oop_maps = new OopMapSet();
+ OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0);
+ assert_cond(oop_maps != NULL && map != NULL);
+
+ // location of fp is known implicitly by the frame sender code
+
+ oop_maps->add_gc_map(__ pc() - start, map);
+
+ __ reset_last_Java_frame(false);
+
+ // move UnrollBlock* into x14
+ __ mv(x14, x10);
+
+#ifdef ASSERT
+ { Label L;
+ __ lwu(t0, Address(x14, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
+ __ mvw(t1, Deoptimization::Unpack_uncommon_trap);
+ __ beq(t0, t1, L);
+ __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
+ __ bind(L);
+ }
+#endif
+
+ // Pop all the frames we must move/replace.
+ //
+ // Frame picture (youngest to oldest)
+ // 1: self-frame (no frame link)
+ // 2: deopting frame (no frame link)
+ // 3: caller of deopting frame (could be compiled/interpreted).
+
+ __ add(sp, sp, (SimpleRuntimeFrame::framesize) << LogBytesPerInt); // Epilog!
+
+ // Pop deoptimized frame (int)
+ __ lwu(x12, Address(x14,
+ Deoptimization::UnrollBlock::
+ size_of_deoptimized_frame_offset_in_bytes()));
+ __ sub(x12, x12, 2 * wordSize);
+ __ add(sp, sp, x12);
+ __ ld(fp, sp, 0);
+ __ ld(ra, sp, wordSize);
+ __ addi(sp, sp, 2 * wordSize);
+ // RA should now be the return address to the caller (3) frame
+
+#ifdef ASSERT
+ // Compilers generate code that bang the stack by as much as the
+ // interpreter would need. So this stack banging should never
+ // trigger a fault. Verify that it does not on non product builds.
+ __ lwu(x11, Address(x14,
+ Deoptimization::UnrollBlock::
+ total_frame_sizes_offset_in_bytes()));
+ __ bang_stack_size(x11, x12);
+#endif
+
+ // Load address of array of frame pcs into x12 (address*)
+ __ ld(x12, Address(x14,
+ Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
+
+ // Load address of array of frame sizes into x15 (intptr_t*)
+ __ ld(x15, Address(x14,
+ Deoptimization::UnrollBlock::
+ frame_sizes_offset_in_bytes()));
+
+ // Counter
+ __ lwu(x13, Address(x14,
+ Deoptimization::UnrollBlock::
+ number_of_frames_offset_in_bytes())); // (int)
+
+ // Now adjust the caller's stack to make up for the extra locals but
+ // record the original sp so that we can save it in the skeletal
+ // interpreter frame and the stack walking of interpreter_sender
+ // will get the unextended sp value and not the "real" sp value.
+
+ const Register sender_sp = t1; // temporary register
+
+ __ lwu(x11, Address(x14,
+ Deoptimization::UnrollBlock::
+ caller_adjustment_offset_in_bytes())); // (int)
+ __ mv(sender_sp, sp);
+ __ sub(sp, sp, x11);
+
+ // Push interpreter frames in a loop
+ Label loop;
+ __ bind(loop);
+ __ ld(x11, Address(x15, 0)); // Load frame size
+ __ sub(x11, x11, 2 * wordSize); // We'll push pc and fp by hand
+ __ ld(ra, Address(x12, 0)); // Save return address
+ __ enter(); // and old fp & set new fp
+ __ sub(sp, sp, x11); // Prolog
+ __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
+ // This value is corrected by layout_activation_impl
+ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
+ __ mv(sender_sp, sp); // Pass sender_sp to next frame
+ __ add(x15, x15, wordSize); // Bump array pointer (sizes)
+ __ add(x12, x12, wordSize); // Bump array pointer (pcs)
+ __ subw(x13, x13, 1); // Decrement counter
+ __ bgtz(x13, loop);
+ __ ld(ra, Address(x12, 0)); // save final return address
+ // Re-push self-frame
+ __ enter(); // & old fp & set new fp
+
+ // Use fp because the frames look interpreted now
+ // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
+ // Don't need the precise return PC here, just precise enough to point into this code blob.
+ address the_pc = __ pc();
+ __ set_last_Java_frame(sp, fp, the_pc, t0);
+
+ // Call C code. Need thread but NOT official VM entry
+ // crud. We cannot block on this call, no GC can happen. Call should
+ // restore return values to their stack-slots with the new SP.
+ //
+ // BasicType unpack_frames(JavaThread* thread, int exec_mode)
+ //
+
+ // n.b. 2 gp args, 0 fp args, integral return type
+
+ // sp should already be aligned
+ __ mv(c_rarg0, xthread);
+ __ mvw(c_rarg1, (unsigned)Deoptimization::Unpack_uncommon_trap);
+ offset = 0;
+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset);
+ __ jalr(x1, t0, offset);
+
+ // Set an oopmap for the call site
+ // Use the same PC we used for the last java frame
+ oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
+
+ // Clear fp AND pc
+ __ reset_last_Java_frame(true);
+
+ // Pop self-frame.
+ __ leave(); // Epilog
+
+ // Jump to interpreter
+ __ ret();
+
+ // Make sure all code is generated
+ masm->flush();
+
+ _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps,
+ SimpleRuntimeFrame::framesize >> 1);
+}
+#endif // COMPILER2
+
+//------------------------------generate_handler_blob------
+//
+// Generate a special Compile2Runtime blob that saves all registers,
+// and setup oopmap.
+//
+SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
+ ResourceMark rm;
+ OopMapSet *oop_maps = new OopMapSet();
+ assert_cond(oop_maps != NULL);
+ OopMap* map = NULL;
+
+ // Allocate space for the code. Setup code generation tools.
+ CodeBuffer buffer("handler_blob", 2048, 1024);
+ MacroAssembler* masm = new MacroAssembler(&buffer);
+ assert_cond(masm != NULL);
+
+ address start = __ pc();
+ address call_pc = NULL;
+ int frame_size_in_words = -1;
+ bool cause_return = (poll_type == POLL_AT_RETURN);
+ RegisterSaver reg_saver(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */);
+
+ // Save Integer and Float registers.
+ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
+
+ // The following is basically a call_VM. However, we need the precise
+ // address of the call in order to generate an oopmap. Hence, we do all the
+ // work outselves.
+
+ Label retaddr;
+ __ set_last_Java_frame(sp, noreg, retaddr, t0);
+
+ // The return address must always be correct so that frame constructor never
+ // sees an invalid pc.
+
+ if (!cause_return) {
+ // overwrite the return address pushed by save_live_registers
+ // Additionally, x18 is a callee-saved register so we can look at
+ // it later to determine if someone changed the return address for
+ // us!
+ __ ld(x18, Address(xthread, JavaThread::saved_exception_pc_offset()));
+ __ sd(x18, Address(fp, frame::return_addr_offset * wordSize));
+ }
+
+ // Do the call
+ __ mv(c_rarg0, xthread);
+ int32_t offset = 0;
+ __ la_patchable(t0, RuntimeAddress(call_ptr), offset);
+ __ jalr(x1, t0, offset);
+ __ bind(retaddr);
+
+ // Set an oopmap for the call site. This oopmap will map all
+ // oop-registers and debug-info registers as callee-saved. This
+ // will allow deoptimization at this safepoint to find all possible
+ // debug-info recordings, as well as let GC find all oops.
+
+ oop_maps->add_gc_map( __ pc() - start, map);
+
+ Label noException;
+
+ __ reset_last_Java_frame(false);
+
+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+
+ __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
+ __ beqz(t0, noException);
+
+ // Exception pending
+
+ reg_saver.restore_live_registers(masm);
+
+ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+
+ // No exception case
+ __ bind(noException);
+
+ Label no_adjust, bail;
+ if (!cause_return) {
+ // If our stashed return pc was modified by the runtime we avoid touching it
+ __ ld(t0, Address(fp, frame::return_addr_offset * wordSize));
+ __ bne(x18, t0, no_adjust);
+
+#ifdef ASSERT
+ // Verify the correct encoding of the poll we're about to skip.
+ // See NativeInstruction::is_lwu_to_zr()
+ __ lwu(t0, Address(x18));
+ __ andi(t1, t0, 0b0000011);
+ __ mv(t2, 0b0000011);
+ __ bne(t1, t2, bail); // 0-6:0b0000011
+ __ srli(t1, t0, 7);
+ __ andi(t1, t1, 0b00000);
+ __ bnez(t1, bail); // 7-11:0b00000
+ __ srli(t1, t0, 12);
+ __ andi(t1, t1, 0b110);
+ __ mv(t2, 0b110);
+ __ bne(t1, t2, bail); // 12-14:0b110
+#endif
+ // Adjust return pc forward to step over the safepoint poll instruction
+ __ add(x18, x18, NativeInstruction::instruction_size);
+ __ sd(x18, Address(fp, frame::return_addr_offset * wordSize));
+ }
+
+ __ bind(no_adjust);
+ // Normal exit, restore registers and exit.
+
+ reg_saver.restore_live_registers(masm);
+ __ ret();
+
+#ifdef ASSERT
+ __ bind(bail);
+ __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
+#endif
+
+ // Make sure all code is generated
+ masm->flush();
+
+ // Fill-out other meta info
+ return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
+}
+
+//
+// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
+//
+// Generate a stub that calls into vm to find out the proper destination
+// of a java call. All the argument registers are live at this point
+// but since this is generic code we don't know what they are and the caller
+// must do any gc of the args.
+//
+RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
+ assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
+
+ // allocate space for the code
+ ResourceMark rm;
+
+ CodeBuffer buffer(name, 1000, 512);
+ MacroAssembler* masm = new MacroAssembler(&buffer);
+ assert_cond(masm != NULL);
+
+ int frame_size_in_words = -1;
+ RegisterSaver reg_saver(false /* save_vectors */);
+
+ OopMapSet *oop_maps = new OopMapSet();
+ assert_cond(oop_maps != NULL);
+ OopMap* map = NULL;
+
+ int start = __ offset();
+
+ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
+
+ int frame_complete = __ offset();
+
+ {
+ Label retaddr;
+ __ set_last_Java_frame(sp, noreg, retaddr, t0);
+
+ __ mv(c_rarg0, xthread);
+ int32_t offset = 0;
+ __ la_patchable(t0, RuntimeAddress(destination), offset);
+ __ jalr(x1, t0, offset);
+ __ bind(retaddr);
+ }
+
+ // Set an oopmap for the call site.
+ // We need this not only for callee-saved registers, but also for volatile
+ // registers that the compiler might be keeping live across a safepoint.
+
+ oop_maps->add_gc_map( __ offset() - start, map);
+
+ // x10 contains the address we are going to jump to assuming no exception got installed
+
+ // clear last_Java_sp
+ __ reset_last_Java_frame(false);
+ // check for pending exceptions
+ Label pending;
+ __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
+ __ bnez(t0, pending);
+
+ // get the returned Method*
+ __ get_vm_result_2(xmethod, xthread);
+ __ sd(xmethod, Address(sp, reg_saver.reg_offset_in_bytes(xmethod)));
+
+ // x10 is where we want to jump, overwrite t0 which is saved and temporary
+ __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(t0)));
+ reg_saver.restore_live_registers(masm);
+
+ // We are back the the original state on entry and ready to go.
+
+ __ jr(t0);
+
+ // Pending exception after the safepoint
+
+ __ bind(pending);
+
+ reg_saver.restore_live_registers(masm);
+
+ // exception pending => remove activation and forward to exception handler
+
+ __ sd(zr, Address(xthread, JavaThread::vm_result_offset()));
+
+ __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
+ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+
+ // -------------
+ // make sure all code is generated
+ masm->flush();
+
+ // return the blob
+ return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
+}
+
+#ifdef COMPILER2
+RuntimeStub* SharedRuntime::make_native_invoker(address call_target,
+ int shadow_space_bytes,
+ const GrowableArray& input_registers,
+ const GrowableArray& output_registers) {
+ Unimplemented();
+ return nullptr;
+}
+
+//------------------------------generate_exception_blob---------------------------
+// creates exception blob at the end
+// Using exception blob, this code is jumped from a compiled method.
+// (see emit_exception_handler in riscv.ad file)
+//
+// Given an exception pc at a call we call into the runtime for the
+// handler in this method. This handler might merely restore state
+// (i.e. callee save registers) unwind the frame and jump to the
+// exception handler for the nmethod if there is no Java level handler
+// for the nmethod.
+//
+// This code is entered with a jmp.
+//
+// Arguments:
+// x10: exception oop
+// x13: exception pc
+//
+// Results:
+// x10: exception oop
+// x13: exception pc in caller
+// destination: exception handler of caller
+//
+// Note: the exception pc MUST be at a call (precise debug information)
+// Registers x10, x13, x12, x14, x15, t0 are not callee saved.
+//
+
+void OptoRuntime::generate_exception_blob() {
+ assert(!OptoRuntime::is_callee_saved_register(R13_num), "");
+ assert(!OptoRuntime::is_callee_saved_register(R10_num), "");
+ assert(!OptoRuntime::is_callee_saved_register(R12_num), "");
+
+ assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
+
+ // Allocate space for the code
+ ResourceMark rm;
+ // Setup code generation tools
+ CodeBuffer buffer("exception_blob", 2048, 1024);
+ MacroAssembler* masm = new MacroAssembler(&buffer);
+ assert_cond(masm != NULL);
+
+ // TODO check various assumptions made here
+ //
+ // make sure we do so before running this
+
+ address start = __ pc();
+
+ // push fp and retaddr by hand
+ // Exception pc is 'return address' for stack walker
+ __ addi(sp, sp, -2 * wordSize);
+ __ sd(ra, Address(sp, wordSize));
+ __ sd(fp, Address(sp));
+ // there are no callee save registers and we don't expect an
+ // arg reg save area
+#ifndef PRODUCT
+ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
+#endif
+ // Store exception in Thread object. We cannot pass any arguments to the
+ // handle_exception call, since we do not want to make any assumption
+ // about the size of the frame where the exception happened in.
+ __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
+ __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
+
+ // This call does all the hard work. It checks if an exception handler
+ // exists in the method.
+ // If so, it returns the handler address.
+ // If not, it prepares for stack-unwinding, restoring the callee-save
+ // registers of the frame being removed.
+ //
+ // address OptoRuntime::handle_exception_C(JavaThread* thread)
+ //
+ // n.b. 1 gp arg, 0 fp args, integral return type
+
+ // the stack should always be aligned
+ address the_pc = __ pc();
+ __ set_last_Java_frame(sp, noreg, the_pc, t0);
+ __ mv(c_rarg0, xthread);
+ int32_t offset = 0;
+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)), offset);
+ __ jalr(x1, t0, offset);
+
+
+ // handle_exception_C is a special VM call which does not require an explicit
+ // instruction sync afterwards.
+
+ // Set an oopmap for the call site. This oopmap will only be used if we
+ // are unwinding the stack. Hence, all locations will be dead.
+ // Callee-saved registers will be the same as the frame above (i.e.,
+ // handle_exception_stub), since they were restored when we got the
+ // exception.
+
+ OopMapSet* oop_maps = new OopMapSet();
+ assert_cond(oop_maps != NULL);
+
+ oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
+
+ __ reset_last_Java_frame(false);
+
+ // Restore callee-saved registers
+
+ // fp is an implicitly saved callee saved register (i.e. the calling
+ // convention will save restore it in prolog/epilog) Other than that
+ // there are no callee save registers now that adapter frames are gone.
+ // and we dont' expect an arg reg save area
+ __ ld(fp, Address(sp));
+ __ ld(x13, Address(sp, wordSize));
+ __ addi(sp, sp , 2 * wordSize);
+
+ // x10: exception handler
+
+ // We have a handler in x10 (could be deopt blob).
+ __ mv(t0, x10);
+
+ // Get the exception oop
+ __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
+ // Get the exception pc in case we are deoptimized
+ __ ld(x14, Address(xthread, JavaThread::exception_pc_offset()));
+#ifdef ASSERT
+ __ sd(zr, Address(xthread, JavaThread::exception_handler_pc_offset()));
+ __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
+#endif
+ // Clear the exception oop so GC no longer processes it as a root.
+ __ sd(zr, Address(xthread, JavaThread::exception_oop_offset()));
+
+ // x10: exception oop
+ // t0: exception handler
+ // x14: exception pc
+ // Jump to handler
+
+ __ jr(t0);
+
+ // Make sure all code is generated
+ masm->flush();
+
+ // Set exception blob
+ _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
+}
+#endif // COMPILER2
diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b3fdd04db1b3998acaaed1440ebc84e42fb8e7b1
--- /dev/null
+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
@@ -0,0 +1,3864 @@
+/*
+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "compiler/oopMap.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/universe.hpp"
+#include "nativeInst_riscv.hpp"
+#include "oops/instanceOop.hpp"
+#include "oops/method.hpp"
+#include "oops/objArrayKlass.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubCodeGenerator.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
+#include "utilities/align.hpp"
+#include "utilities/powerOfTwo.hpp"
+#ifdef COMPILER2
+#include "opto/runtime.hpp"
+#endif
+#if INCLUDE_ZGC
+#include "gc/z/zThreadLocalData.hpp"
+#endif
+
+// Declaration and definition of StubGenerator (no .hpp file).
+// For a more detailed description of the stub routine structure
+// see the comment in stubRoutines.hpp
+
+#undef __
+#define __ _masm->
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#else
+#define BLOCK_COMMENT(str) __ block_comment(str)
+#endif
+
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+// Stub Code definitions
+
+class StubGenerator: public StubCodeGenerator {
+ private:
+
+#ifdef PRODUCT
+#define inc_counter_np(counter) ((void)0)
+#else
+ void inc_counter_np_(int& counter) {
+ __ la(t1, ExternalAddress((address)&counter));
+ __ lwu(t0, Address(t1, 0));
+ __ addiw(t0, t0, 1);
+ __ sw(t0, Address(t1, 0));
+ }
+#define inc_counter_np(counter) \
+ BLOCK_COMMENT("inc_counter " #counter); \
+ inc_counter_np_(counter);
+#endif
+
+ // Call stubs are used to call Java from C
+ //
+ // Arguments:
+ // c_rarg0: call wrapper address address
+ // c_rarg1: result address
+ // c_rarg2: result type BasicType
+ // c_rarg3: method Method*
+ // c_rarg4: (interpreter) entry point address
+ // c_rarg5: parameters intptr_t*
+ // c_rarg6: parameter size (in words) int
+ // c_rarg7: thread Thread*
+ //
+ // There is no return from the stub itself as any Java result
+ // is written to result
+ //
+ // we save x1 (ra) as the return PC at the base of the frame and
+ // link x8 (fp) below it as the frame pointer installing sp (x2)
+ // into fp.
+ //
+ // we save x10-x17, which accounts for all the c arguments.
+ //
+ // TODO: strictly do we need to save them all? they are treated as
+ // volatile by C so could we omit saving the ones we are going to
+ // place in global registers (thread? method?) or those we only use
+ // during setup of the Java call?
+ //
+ // we don't need to save x5 which C uses as an indirect result location
+ // return register.
+ //
+ // we don't need to save x6-x7 and x28-x31 which both C and Java treat as
+ // volatile
+ //
+ // we save x18-x27 which Java uses as temporary registers and C
+ // expects to be callee-save
+ //
+ // so the stub frame looks like this when we enter Java code
+ //
+ // [ return_from_Java ] <--- sp
+ // [ argument word n ]
+ // ...
+ // -22 [ argument word 1 ]
+ // -21 [ saved x27 ] <--- sp_after_call
+ // -20 [ saved x26 ]
+ // -19 [ saved x25 ]
+ // -18 [ saved x24 ]
+ // -17 [ saved x23 ]
+ // -16 [ saved x22 ]
+ // -15 [ saved x21 ]
+ // -14 [ saved x20 ]
+ // -13 [ saved x19 ]
+ // -12 [ saved x18 ]
+ // -11 [ saved x9 ]
+ // -10 [ call wrapper (x10) ]
+ // -9 [ result (x11) ]
+ // -8 [ result type (x12) ]
+ // -7 [ method (x13) ]
+ // -6 [ entry point (x14) ]
+ // -5 [ parameters (x15) ]
+ // -4 [ parameter size (x16) ]
+ // -3 [ thread (x17) ]
+ // -2 [ saved fp (x8) ]
+ // -1 [ saved ra (x1) ]
+ // 0 [ ] <--- fp == saved sp (x2)
+
+ // Call stub stack layout word offsets from fp
+ enum call_stub_layout {
+ sp_after_call_off = -21,
+
+ x27_off = -21,
+ x26_off = -20,
+ x25_off = -19,
+ x24_off = -18,
+ x23_off = -17,
+ x22_off = -16,
+ x21_off = -15,
+ x20_off = -14,
+ x19_off = -13,
+ x18_off = -12,
+ x9_off = -11,
+
+ call_wrapper_off = -10,
+ result_off = -9,
+ result_type_off = -8,
+ method_off = -7,
+ entry_point_off = -6,
+ parameters_off = -5,
+ parameter_size_off = -4,
+ thread_off = -3,
+ fp_f = -2,
+ retaddr_off = -1,
+ };
+
+ address generate_call_stub(address& return_address) {
+ assert((int)frame::entry_frame_after_call_words == -(int)sp_after_call_off + 1 &&
+ (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off,
+ "adjust this code");
+
+ StubCodeMark mark(this, "StubRoutines", "call_stub");
+ address start = __ pc();
+
+ const Address sp_after_call (fp, sp_after_call_off * wordSize);
+
+ const Address call_wrapper (fp, call_wrapper_off * wordSize);
+ const Address result (fp, result_off * wordSize);
+ const Address result_type (fp, result_type_off * wordSize);
+ const Address method (fp, method_off * wordSize);
+ const Address entry_point (fp, entry_point_off * wordSize);
+ const Address parameters (fp, parameters_off * wordSize);
+ const Address parameter_size(fp, parameter_size_off * wordSize);
+
+ const Address thread (fp, thread_off * wordSize);
+
+ const Address x27_save (fp, x27_off * wordSize);
+ const Address x26_save (fp, x26_off * wordSize);
+ const Address x25_save (fp, x25_off * wordSize);
+ const Address x24_save (fp, x24_off * wordSize);
+ const Address x23_save (fp, x23_off * wordSize);
+ const Address x22_save (fp, x22_off * wordSize);
+ const Address x21_save (fp, x21_off * wordSize);
+ const Address x20_save (fp, x20_off * wordSize);
+ const Address x19_save (fp, x19_off * wordSize);
+ const Address x18_save (fp, x18_off * wordSize);
+
+ const Address x9_save (fp, x9_off * wordSize);
+
+ // stub code
+
+ address riscv_entry = __ pc();
+
+ // set up frame and move sp to end of save area
+ __ enter();
+ __ addi(sp, fp, sp_after_call_off * wordSize);
+
+ // save register parameters and Java temporary/global registers
+ // n.b. we save thread even though it gets installed in
+ // xthread because we want to sanity check tp later
+ __ sd(c_rarg7, thread);
+ __ sw(c_rarg6, parameter_size);
+ __ sd(c_rarg5, parameters);
+ __ sd(c_rarg4, entry_point);
+ __ sd(c_rarg3, method);
+ __ sd(c_rarg2, result_type);
+ __ sd(c_rarg1, result);
+ __ sd(c_rarg0, call_wrapper);
+
+ __ sd(x9, x9_save);
+
+ __ sd(x18, x18_save);
+ __ sd(x19, x19_save);
+ __ sd(x20, x20_save);
+ __ sd(x21, x21_save);
+ __ sd(x22, x22_save);
+ __ sd(x23, x23_save);
+ __ sd(x24, x24_save);
+ __ sd(x25, x25_save);
+ __ sd(x26, x26_save);
+ __ sd(x27, x27_save);
+
+ // install Java thread in global register now we have saved
+ // whatever value it held
+ __ mv(xthread, c_rarg7);
+
+ // And method
+ __ mv(xmethod, c_rarg3);
+
+ // set up the heapbase register
+ __ reinit_heapbase();
+
+#ifdef ASSERT
+ // make sure we have no pending exceptions
+ {
+ Label L;
+ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
+ __ beqz(t0, L);
+ __ stop("StubRoutines::call_stub: entered with pending exception");
+ __ BIND(L);
+ }
+#endif
+ // pass parameters if any
+ __ mv(esp, sp);
+ __ slli(t0, c_rarg6, LogBytesPerWord);
+ __ sub(t0, sp, t0); // Move SP out of the way
+ __ andi(sp, t0, -2 * wordSize);
+
+ BLOCK_COMMENT("pass parameters if any");
+ Label parameters_done;
+ // parameter count is still in c_rarg6
+ // and parameter pointer identifying param 1 is in c_rarg5
+ __ beqz(c_rarg6, parameters_done);
+
+ address loop = __ pc();
+ __ ld(t0, c_rarg5, 0);
+ __ addi(c_rarg5, c_rarg5, wordSize);
+ __ addi(c_rarg6, c_rarg6, -1);
+ __ push_reg(t0);
+ __ bgtz(c_rarg6, loop);
+
+ __ BIND(parameters_done);
+
+ // call Java entry -- passing methdoOop, and current sp
+ // xmethod: Method*
+ // x30: sender sp
+ BLOCK_COMMENT("call Java function");
+ __ mv(x30, sp);
+ __ jalr(c_rarg4);
+
+ // save current address for use by exception handling code
+
+ return_address = __ pc();
+
+ // store result depending on type (everything that is not
+ // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
+ // n.b. this assumes Java returns an integral result in x10
+ // and a floating result in j_farg0
+ __ ld(j_rarg2, result);
+ Label is_long, is_float, is_double, exit;
+ __ ld(j_rarg1, result_type);
+ __ li(t0, (u1)T_OBJECT);
+ __ beq(j_rarg1, t0, is_long);
+ __ li(t0, (u1)T_LONG);
+ __ beq(j_rarg1, t0, is_long);
+ __ li(t0, (u1)T_FLOAT);
+ __ beq(j_rarg1, t0, is_float);
+ __ li(t0, (u1)T_DOUBLE);
+ __ beq(j_rarg1, t0, is_double);
+
+ // handle T_INT case
+ __ sw(x10, Address(j_rarg2));
+
+ __ BIND(exit);
+
+ // pop parameters
+ __ addi(esp, fp, sp_after_call_off * wordSize);
+
+#ifdef ASSERT
+ // verify that threads correspond
+ {
+ Label L, S;
+ __ ld(t0, thread);
+ __ bne(xthread, t0, S);
+ __ get_thread(t0);
+ __ beq(xthread, t0, L);
+ __ BIND(S);
+ __ stop("StubRoutines::call_stub: threads must correspond");
+ __ BIND(L);
+ }
+#endif
+
+ // restore callee-save registers
+ __ ld(x27, x27_save);
+ __ ld(x26, x26_save);
+ __ ld(x25, x25_save);
+ __ ld(x24, x24_save);
+ __ ld(x23, x23_save);
+ __ ld(x22, x22_save);
+ __ ld(x21, x21_save);
+ __ ld(x20, x20_save);
+ __ ld(x19, x19_save);
+ __ ld(x18, x18_save);
+
+ __ ld(x9, x9_save);
+
+ __ ld(c_rarg0, call_wrapper);
+ __ ld(c_rarg1, result);
+ __ ld(c_rarg2, result_type);
+ __ ld(c_rarg3, method);
+ __ ld(c_rarg4, entry_point);
+ __ ld(c_rarg5, parameters);
+ __ ld(c_rarg6, parameter_size);
+ __ ld(c_rarg7, thread);
+
+ // leave frame and return to caller
+ __ leave();
+ __ ret();
+
+ // handle return types different from T_INT
+
+ __ BIND(is_long);
+ __ sd(x10, Address(j_rarg2, 0));
+ __ j(exit);
+
+ __ BIND(is_float);
+ __ fsw(j_farg0, Address(j_rarg2, 0), t0);
+ __ j(exit);
+
+ __ BIND(is_double);
+ __ fsd(j_farg0, Address(j_rarg2, 0), t0);
+ __ j(exit);
+
+ return start;
+ }
+
+ // Return point for a Java call if there's an exception thrown in
+ // Java code. The exception is caught and transformed into a
+ // pending exception stored in JavaThread that can be tested from
+ // within the VM.
+ //
+ // Note: Usually the parameters are removed by the callee. In case
+ // of an exception crossing an activation frame boundary, that is
+ // not the case if the callee is compiled code => need to setup the
+ // sp.
+ //
+ // x10: exception oop
+
+ address generate_catch_exception() {
+ StubCodeMark mark(this, "StubRoutines", "catch_exception");
+ address start = __ pc();
+
+ // same as in generate_call_stub():
+ const Address thread(fp, thread_off * wordSize);
+
+#ifdef ASSERT
+ // verify that threads correspond
+ {
+ Label L, S;
+ __ ld(t0, thread);
+ __ bne(xthread, t0, S);
+ __ get_thread(t0);
+ __ beq(xthread, t0, L);
+ __ bind(S);
+ __ stop("StubRoutines::catch_exception: threads must correspond");
+ __ bind(L);
+ }
+#endif
+
+ // set pending exception
+ __ verify_oop(x10);
+
+ __ sd(x10, Address(xthread, Thread::pending_exception_offset()));
+ __ mv(t0, (address)__FILE__);
+ __ sd(t0, Address(xthread, Thread::exception_file_offset()));
+ __ mv(t0, (int)__LINE__);
+ __ sw(t0, Address(xthread, Thread::exception_line_offset()));
+
+ // complete return to VM
+ assert(StubRoutines::_call_stub_return_address != NULL,
+ "_call_stub_return_address must have been generated before");
+ __ j(StubRoutines::_call_stub_return_address);
+
+ return start;
+ }
+
+ // Continuation point for runtime calls returning with a pending
+ // exception. The pending exception check happened in the runtime
+ // or native call stub. The pending exception in Thread is
+ // converted into a Java-level exception.
+ //
+ // Contract with Java-level exception handlers:
+ // x10: exception
+ // x13: throwing pc
+ //
+ // NOTE: At entry of this stub, exception-pc must be in RA !!
+
+ // NOTE: this is always used as a jump target within generated code
+ // so it just needs to be generated code with no x86 prolog
+
+ address generate_forward_exception() {
+ StubCodeMark mark(this, "StubRoutines", "forward exception");
+ address start = __ pc();
+
+ // Upon entry, RA points to the return address returning into
+ // Java (interpreted or compiled) code; i.e., the return address
+ // becomes the throwing pc.
+ //
+ // Arguments pushed before the runtime call are still on the stack
+ // but the exception handler will reset the stack pointer ->
+ // ignore them. A potential result in registers can be ignored as
+ // well.
+
+#ifdef ASSERT
+ // make sure this code is only executed if there is a pending exception
+ {
+ Label L;
+ __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
+ __ bnez(t0, L);
+ __ stop("StubRoutines::forward exception: no pending exception (1)");
+ __ bind(L);
+ }
+#endif
+
+ // compute exception handler into x9
+
+ // call the VM to find the handler address associated with the
+ // caller address. pass thread in x10 and caller pc (ret address)
+ // in x11. n.b. the caller pc is in ra, unlike x86 where it is on
+ // the stack.
+ __ mv(c_rarg1, ra);
+ // ra will be trashed by the VM call so we move it to x9
+ // (callee-saved) because we also need to pass it to the handler
+ // returned by this call.
+ __ mv(x9, ra);
+ BLOCK_COMMENT("call exception_handler_for_return_address");
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address,
+ SharedRuntime::exception_handler_for_return_address),
+ xthread, c_rarg1);
+ // we should not really care that ra is no longer the callee
+ // address. we saved the value the handler needs in x9 so we can
+ // just copy it to x13. however, the C2 handler will push its own
+ // frame and then calls into the VM and the VM code asserts that
+ // the PC for the frame above the handler belongs to a compiled
+ // Java method. So, we restore ra here to satisfy that assert.
+ __ mv(ra, x9);
+ // setup x10 & x13 & clear pending exception
+ __ mv(x13, x9);
+ __ mv(x9, x10);
+ __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
+ __ sd(zr, Address(xthread, Thread::pending_exception_offset()));
+
+#ifdef ASSERT
+ // make sure exception is set
+ {
+ Label L;
+ __ bnez(x10, L);
+ __ stop("StubRoutines::forward exception: no pending exception (2)");
+ __ bind(L);
+ }
+#endif
+
+ // continue at exception handler
+ // x10: exception
+ // x13: throwing pc
+ // x9: exception handler
+ __ verify_oop(x10);
+ __ jr(x9);
+
+ return start;
+ }
+
+ // Non-destructive plausibility checks for oops
+ //
+ // Arguments:
+ // x10: oop to verify
+ // t0: error message
+ //
+ // Stack after saving c_rarg3:
+ // [tos + 0]: saved c_rarg3
+ // [tos + 1]: saved c_rarg2
+ // [tos + 2]: saved ra
+ // [tos + 3]: saved t1
+ // [tos + 4]: saved x10
+ // [tos + 5]: saved t0
+ address generate_verify_oop() {
+
+ StubCodeMark mark(this, "StubRoutines", "verify_oop");
+ address start = __ pc();
+
+ Label exit, error;
+
+ __ push_reg(0x3000, sp); // save c_rarg2 and c_rarg3
+
+ __ la(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
+ __ ld(c_rarg3, Address(c_rarg2));
+ __ add(c_rarg3, c_rarg3, 1);
+ __ sd(c_rarg3, Address(c_rarg2));
+
+ // object is in x10
+ // make sure object is 'reasonable'
+ __ beqz(x10, exit); // if obj is NULL it is OK
+
+#if INCLUDE_ZGC
+ if (UseZGC) {
+ // Check if mask is good.
+ // verifies that ZAddressBadMask & x10 == 0
+ __ ld(c_rarg3, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
+ __ andr(c_rarg2, x10, c_rarg3);
+ __ bnez(c_rarg2, error);
+ }
+#endif
+
+ // Check if the oop is in the right area of memory
+ __ mv(c_rarg3, (intptr_t) Universe::verify_oop_mask());
+ __ andr(c_rarg2, x10, c_rarg3);
+ __ mv(c_rarg3, (intptr_t) Universe::verify_oop_bits());
+
+ // Compare c_rarg2 and c_rarg3.
+ __ bne(c_rarg2, c_rarg3, error);
+
+ // make sure klass is 'reasonable', which is not zero.
+ __ load_klass(x10, x10); // get klass
+ __ beqz(x10, error); // if klass is NULL it is broken
+
+ // return if everything seems ok
+ __ bind(exit);
+
+ __ pop_reg(0x3000, sp); // pop c_rarg2 and c_rarg3
+ __ ret();
+
+ // handle errors
+ __ bind(error);
+ __ pop_reg(0x3000, sp); // pop c_rarg2 and c_rarg3
+
+ __ pusha();
+ // debug(char* msg, int64_t pc, int64_t regs[])
+ __ mv(c_rarg0, t0); // pass address of error message
+ __ mv(c_rarg1, ra); // pass return address
+ __ mv(c_rarg2, sp); // pass address of regs on stack
+#ifndef PRODUCT
+ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
+#endif
+ BLOCK_COMMENT("call MacroAssembler::debug");
+ int32_t offset = 0;
+ __ movptr_with_offset(t0, CAST_FROM_FN_PTR(address, MacroAssembler::debug64), offset);
+ __ jalr(x1, t0, offset);
+ __ ebreak();
+
+ return start;
+ }
+
+ // The inner part of zero_words().
+ //
+ // Inputs:
+ // x28: the HeapWord-aligned base address of an array to zero.
+ // x29: the count in HeapWords, x29 > 0.
+ //
+ // Returns x28 and x29, adjusted for the caller to clear.
+ // x28: the base address of the tail of words left to clear.
+ // x29: the number of words in the tail.
+ // x29 < MacroAssembler::zero_words_block_size.
+
+ address generate_zero_blocks() {
+ Label done;
+
+ const Register base = x28, cnt = x29;
+
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "zero_blocks");
+ address start = __ pc();
+
+ {
+ // Clear the remaining blocks.
+ Label loop;
+ __ sub(cnt, cnt, MacroAssembler::zero_words_block_size);
+ __ bltz(cnt, done);
+ __ bind(loop);
+ for (int i = 0; i < MacroAssembler::zero_words_block_size; i++) {
+ __ sd(zr, Address(base, 0));
+ __ add(base, base, 8);
+ }
+ __ sub(cnt, cnt, MacroAssembler::zero_words_block_size);
+ __ bgez(cnt, loop);
+ __ bind(done);
+ __ add(cnt, cnt, MacroAssembler::zero_words_block_size);
+ }
+
+ __ ret();
+
+ return start;
+ }
+
+ typedef enum {
+ copy_forwards = 1,
+ copy_backwards = -1
+ } copy_direction;
+
+ // Bulk copy of blocks of 8 words.
+ //
+ // count is a count of words.
+ //
+ // Precondition: count >= 8
+ //
+ // Postconditions:
+ //
+ // The least significant bit of count contains the remaining count
+ // of words to copy. The rest of count is trash.
+ //
+ // s and d are adjusted to point to the remaining words to copy
+ //
+ void generate_copy_longs(Label &start, Register s, Register d, Register count,
+ copy_direction direction) {
+ int unit = wordSize * direction;
+ int bias = wordSize;
+
+ const Register tmp_reg0 = x13, tmp_reg1 = x14, tmp_reg2 = x15, tmp_reg3 = x16,
+ tmp_reg4 = x17, tmp_reg5 = x7, tmp_reg6 = x28, tmp_reg7 = x29;
+
+ const Register stride = x30;
+
+ assert_different_registers(t0, tmp_reg0, tmp_reg1, tmp_reg2, tmp_reg3,
+ tmp_reg4, tmp_reg5, tmp_reg6, tmp_reg7);
+ assert_different_registers(s, d, count, t0);
+
+ Label again, drain;
+ const char* stub_name = NULL;
+ if (direction == copy_forwards) {
+ stub_name = "forward_copy_longs";
+ } else {
+ stub_name = "backward_copy_longs";
+ }
+ StubCodeMark mark(this, "StubRoutines", stub_name);
+ __ align(CodeEntryAlignment);
+ __ bind(start);
+
+ if (direction == copy_forwards) {
+ __ sub(s, s, bias);
+ __ sub(d, d, bias);
+ }
+
+#ifdef ASSERT
+ // Make sure we are never given < 8 words
+ {
+ Label L;
+
+ __ li(t0, 8);
+ __ bge(count, t0, L);
+ __ stop("genrate_copy_longs called with < 8 words");
+ __ bind(L);
+ }
+#endif
+
+ __ ld(tmp_reg0, Address(s, 1 * unit));
+ __ ld(tmp_reg1, Address(s, 2 * unit));
+ __ ld(tmp_reg2, Address(s, 3 * unit));
+ __ ld(tmp_reg3, Address(s, 4 * unit));
+ __ ld(tmp_reg4, Address(s, 5 * unit));
+ __ ld(tmp_reg5, Address(s, 6 * unit));
+ __ ld(tmp_reg6, Address(s, 7 * unit));
+ __ ld(tmp_reg7, Address(s, 8 * unit));
+ __ addi(s, s, 8 * unit);
+
+ __ sub(count, count, 16);
+ __ bltz(count, drain);
+
+ __ bind(again);
+
+ __ sd(tmp_reg0, Address(d, 1 * unit));
+ __ sd(tmp_reg1, Address(d, 2 * unit));
+ __ sd(tmp_reg2, Address(d, 3 * unit));
+ __ sd(tmp_reg3, Address(d, 4 * unit));
+ __ sd(tmp_reg4, Address(d, 5 * unit));
+ __ sd(tmp_reg5, Address(d, 6 * unit));
+ __ sd(tmp_reg6, Address(d, 7 * unit));
+ __ sd(tmp_reg7, Address(d, 8 * unit));
+
+ __ ld(tmp_reg0, Address(s, 1 * unit));
+ __ ld(tmp_reg1, Address(s, 2 * unit));
+ __ ld(tmp_reg2, Address(s, 3 * unit));
+ __ ld(tmp_reg3, Address(s, 4 * unit));
+ __ ld(tmp_reg4, Address(s, 5 * unit));
+ __ ld(tmp_reg5, Address(s, 6 * unit));
+ __ ld(tmp_reg6, Address(s, 7 * unit));
+ __ ld(tmp_reg7, Address(s, 8 * unit));
+
+ __ addi(s, s, 8 * unit);
+ __ addi(d, d, 8 * unit);
+
+ __ sub(count, count, 8);
+ __ bgez(count, again);
+
+ // Drain
+ __ bind(drain);
+
+ __ sd(tmp_reg0, Address(d, 1 * unit));
+ __ sd(tmp_reg1, Address(d, 2 * unit));
+ __ sd(tmp_reg2, Address(d, 3 * unit));
+ __ sd(tmp_reg3, Address(d, 4 * unit));
+ __ sd(tmp_reg4, Address(d, 5 * unit));
+ __ sd(tmp_reg5, Address(d, 6 * unit));
+ __ sd(tmp_reg6, Address(d, 7 * unit));
+ __ sd(tmp_reg7, Address(d, 8 * unit));
+ __ addi(d, d, 8 * unit);
+
+ {
+ Label L1, L2;
+ __ andi(t0, count, 4);
+ __ beqz(t0, L1);
+
+ __ ld(tmp_reg0, Address(s, 1 * unit));
+ __ ld(tmp_reg1, Address(s, 2 * unit));
+ __ ld(tmp_reg2, Address(s, 3 * unit));
+ __ ld(tmp_reg3, Address(s, 4 * unit));
+ __ addi(s, s, 4 * unit);
+
+ __ sd(tmp_reg0, Address(d, 1 * unit));
+ __ sd(tmp_reg1, Address(d, 2 * unit));
+ __ sd(tmp_reg2, Address(d, 3 * unit));
+ __ sd(tmp_reg3, Address(d, 4 * unit));
+ __ addi(d, d, 4 * unit);
+
+ __ bind(L1);
+
+ if (direction == copy_forwards) {
+ __ addi(s, s, bias);
+ __ addi(d, d, bias);
+ }
+
+ __ andi(t0, count, 2);
+ __ beqz(t0, L2);
+ if (direction == copy_backwards) {
+ __ addi(s, s, 2 * unit);
+ __ ld(tmp_reg0, Address(s));
+ __ ld(tmp_reg1, Address(s, wordSize));
+ __ addi(d, d, 2 * unit);
+ __ sd(tmp_reg0, Address(d));
+ __ sd(tmp_reg1, Address(d, wordSize));
+ } else {
+ __ ld(tmp_reg0, Address(s));
+ __ ld(tmp_reg1, Address(s, wordSize));
+ __ addi(s, s, 2 * unit);
+ __ sd(tmp_reg0, Address(d));
+ __ sd(tmp_reg1, Address(d, wordSize));
+ __ addi(d, d, 2 * unit);
+ }
+ __ bind(L2);
+ }
+
+ __ ret();
+ }
+
+ Label copy_f, copy_b;
+
+ // All-singing all-dancing memory copy.
+ //
+ // Copy count units of memory from s to d. The size of a unit is
+ // step, which can be positive or negative depending on the direction
+ // of copy. If is_aligned is false, we align the source address.
+ //
+ /*
+ * if (is_aligned) {
+ * goto copy_8_bytes;
+ * }
+ * bool is_backwards = step < 0;
+ * int granularity = uabs(step);
+ * count = count * granularity; * count bytes
+ *
+ * if (is_backwards) {
+ * s += count;
+ * d += count;
+ * }
+ *
+ * count limit maybe greater than 16, for better performance
+ * if (count < 16) {
+ * goto copy_small;
+ * }
+ *
+ * if ((dst % 8) == (src % 8)) {
+ * aligned;
+ * goto copy8;
+ * }
+ *
+ * copy_small:
+ * load element one by one;
+ * done;
+ */
+
+ typedef void (MacroAssembler::*copy_insn)(Register Rd, const Address &adr, Register temp);
+
+ void copy_memory_v(Register s, Register d, Register count, Register tmp, int step) {
+ bool is_backward = step < 0;
+ int granularity = uabs(step);
+
+ const Register src = x30, dst = x31, vl = x14, cnt = x15, tmp1 = x16, tmp2 = x17;
+ assert_different_registers(s, d, cnt, vl, tmp, tmp1, tmp2);
+ Assembler::SEW sew = Assembler::elembytes_to_sew(granularity);
+ Label loop_forward, loop_backward, done;
+
+ __ mv(dst, d);
+ __ mv(src, s);
+ __ mv(cnt, count);
+
+ __ bind(loop_forward);
+ __ vsetvli(vl, cnt, sew, Assembler::m8);
+ if (is_backward) {
+ __ bne(vl, cnt, loop_backward);
+ }
+
+ __ vlex_v(v0, src, sew);
+ __ sub(cnt, cnt, vl);
+ __ slli(vl, vl, (int)sew);
+ __ add(src, src, vl);
+
+ __ vsex_v(v0, dst, sew);
+ __ add(dst, dst, vl);
+ __ bnez(cnt, loop_forward);
+
+ if (is_backward) {
+ __ j(done);
+
+ __ bind(loop_backward);
+ __ sub(tmp, cnt, vl);
+ __ slli(tmp, tmp, sew);
+ __ add(tmp1, s, tmp);
+ __ vlex_v(v0, tmp1, sew);
+ __ add(tmp2, d, tmp);
+ __ vsex_v(v0, tmp2, sew);
+ __ sub(cnt, cnt, vl);
+ __ bnez(cnt, loop_forward);
+ __ bind(done);
+ }
+ }
+
+ void copy_memory(bool is_aligned, Register s, Register d,
+ Register count, Register tmp, int step) {
+ if (UseRVV) {
+ return copy_memory_v(s, d, count, tmp, step);
+ }
+
+ bool is_backwards = step < 0;
+ int granularity = uabs(step);
+
+ const Register src = x30, dst = x31, cnt = x15, tmp3 = x16, tmp4 = x17;
+
+ Label same_aligned;
+ Label copy8, copy_small, done;
+
+ copy_insn ld_arr = NULL, st_arr = NULL;
+ switch (granularity) {
+ case 1 :
+ ld_arr = (copy_insn)&MacroAssembler::lbu;
+ st_arr = (copy_insn)&MacroAssembler::sb;
+ break;
+ case 2 :
+ ld_arr = (copy_insn)&MacroAssembler::lhu;
+ st_arr = (copy_insn)&MacroAssembler::sh;
+ break;
+ case 4 :
+ ld_arr = (copy_insn)&MacroAssembler::lwu;
+ st_arr = (copy_insn)&MacroAssembler::sw;
+ break;
+ case 8 :
+ ld_arr = (copy_insn)&MacroAssembler::ld;
+ st_arr = (copy_insn)&MacroAssembler::sd;
+ break;
+ default :
+ ShouldNotReachHere();
+ }
+
+ __ beqz(count, done);
+ __ slli(cnt, count, exact_log2(granularity));
+ if (is_backwards) {
+ __ add(src, s, cnt);
+ __ add(dst, d, cnt);
+ } else {
+ __ mv(src, s);
+ __ mv(dst, d);
+ }
+
+ if (is_aligned) {
+ __ addi(tmp, cnt, -8);
+ __ bgez(tmp, copy8);
+ __ j(copy_small);
+ }
+
+ __ mv(tmp, 16);
+ __ blt(cnt, tmp, copy_small);
+
+ __ xorr(tmp, src, dst);
+ __ andi(tmp, tmp, 0b111);
+ __ bnez(tmp, copy_small);
+
+ __ bind(same_aligned);
+ __ andi(tmp, src, 0b111);
+ __ beqz(tmp, copy8);
+ if (is_backwards) {
+ __ addi(src, src, step);
+ __ addi(dst, dst, step);
+ }
+ (_masm->*ld_arr)(tmp3, Address(src), t0);
+ (_masm->*st_arr)(tmp3, Address(dst), t0);
+ if (!is_backwards) {
+ __ addi(src, src, step);
+ __ addi(dst, dst, step);
+ }
+ __ addi(cnt, cnt, -granularity);
+ __ beqz(cnt, done);
+ __ j(same_aligned);
+
+ __ bind(copy8);
+ if (is_backwards) {
+ __ addi(src, src, -wordSize);
+ __ addi(dst, dst, -wordSize);
+ }
+ __ ld(tmp3, Address(src));
+ __ sd(tmp3, Address(dst));
+ if (!is_backwards) {
+ __ addi(src, src, wordSize);
+ __ addi(dst, dst, wordSize);
+ }
+ __ addi(cnt, cnt, -wordSize);
+ __ addi(tmp4, cnt, -8);
+ __ bgez(tmp4, copy8); // cnt >= 8, do next loop
+
+ __ beqz(cnt, done);
+
+ __ bind(copy_small);
+ if (is_backwards) {
+ __ addi(src, src, step);
+ __ addi(dst, dst, step);
+ }
+ (_masm->*ld_arr)(tmp3, Address(src), t0);
+ (_masm->*st_arr)(tmp3, Address(dst), t0);
+ if (!is_backwards) {
+ __ addi(src, src, step);
+ __ addi(dst, dst, step);
+ }
+ __ addi(cnt, cnt, -granularity);
+ __ bgtz(cnt, copy_small);
+
+ __ bind(done);
+ }
+
+ // Scan over array at a for count oops, verifying each one.
+ // Preserves a and count, clobbers t0 and t1.
+ void verify_oop_array(size_t size, Register a, Register count, Register temp) {
+ Label loop, end;
+ __ mv(t1, zr);
+ __ slli(t0, count, exact_log2(size));
+ __ bind(loop);
+ __ bgeu(t1, t0, end);
+
+ __ add(temp, a, t1);
+ if (size == (size_t)wordSize) {
+ __ ld(temp, Address(temp, 0));
+ __ verify_oop(temp);
+ } else {
+ __ lwu(temp, Address(temp, 0));
+ __ decode_heap_oop(temp); // calls verify_oop
+ }
+ __ add(t1, t1, size);
+ __ j(loop);
+ __ bind(end);
+ }
+
+ // Arguments:
+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+ // ignored
+ // is_oop - true => oop array, so generate store check code
+ // name - stub name string
+ //
+ // Inputs:
+ // c_rarg0 - source array address
+ // c_rarg1 - destination array address
+ // c_rarg2 - element count, treated as ssize_t, can be zero
+ //
+ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
+ // the hardware handle it. The two dwords within qwords that span
+ // cache line boundaries will still be loaded and stored atomicly.
+ //
+ // Side Effects:
+ // disjoint_int_copy_entry is set to the no-overlap entry point
+ // used by generate_conjoint_int_oop_copy().
+ //
+ address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address* entry,
+ const char* name, bool dest_uninitialized = false) {
+ const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
+ RegSet saved_reg = RegSet::of(s, d, count);
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ address start = __ pc();
+ __ enter();
+
+ if (entry != NULL) {
+ *entry = __ pc();
+ // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+ BLOCK_COMMENT("Entry:");
+ }
+
+ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
+ if (dest_uninitialized) {
+ decorators |= IS_DEST_UNINITIALIZED;
+ }
+ if (aligned) {
+ decorators |= ARRAYCOPY_ALIGNED;
+ }
+
+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+ bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_reg);
+
+ if (is_oop) {
+ // save regs before copy_memory
+ __ push_reg(RegSet::of(d, count), sp);
+ }
+
+ {
+ // UnsafeCopyMemory page error: continue after ucm
+ bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size);
+ UnsafeCopyMemoryMark ucmm(this, add_entry, true);
+ copy_memory(aligned, s, d, count, t0, size);
+ }
+
+ if (is_oop) {
+ __ pop_reg(RegSet::of(d, count), sp);
+ if (VerifyOops) {
+ verify_oop_array(size, d, count, t2);
+ }
+ }
+
+ bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet());
+
+ __ leave();
+ __ mv(x10, zr); // return 0
+ __ ret();
+ return start;
+ }
+
+ // Arguments:
+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+ // ignored
+ // is_oop - true => oop array, so generate store check code
+ // name - stub name string
+ //
+ // Inputs:
+ // c_rarg0 - source array address
+ // c_rarg1 - destination array address
+ // c_rarg2 - element count, treated as ssize_t, can be zero
+ //
+ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
+ // the hardware handle it. The two dwords within qwords that span
+ // cache line boundaries will still be loaded and stored atomicly.
+ //
+ address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target,
+ address* entry, const char* name,
+ bool dest_uninitialized = false) {
+ const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
+ RegSet saved_regs = RegSet::of(s, d, count);
+ StubCodeMark mark(this, "StubRoutines", name);
+ address start = __ pc();
+ __ enter();
+
+ if (entry != NULL) {
+ *entry = __ pc();
+ // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+ BLOCK_COMMENT("Entry:");
+ }
+
+ // use fwd copy when (d-s) above_equal (count*size)
+ __ sub(t0, d, s);
+ __ slli(t1, count, exact_log2(size));
+ __ bgeu(t0, t1, nooverlap_target);
+
+ DecoratorSet decorators = IN_HEAP | IS_ARRAY;
+ if (dest_uninitialized) {
+ decorators |= IS_DEST_UNINITIALIZED;
+ }
+ if (aligned) {
+ decorators |= ARRAYCOPY_ALIGNED;
+ }
+
+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+ bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_regs);
+
+ if (is_oop) {
+ // save regs before copy_memory
+ __ push_reg(RegSet::of(d, count), sp);
+ }
+
+ {
+ // UnsafeCopyMemory page error: continue after ucm
+ bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size);
+ UnsafeCopyMemoryMark ucmm(this, add_entry, true);
+ copy_memory(aligned, s, d, count, t0, -size);
+ }
+
+ if (is_oop) {
+ __ pop_reg(RegSet::of(d, count), sp);
+ if (VerifyOops) {
+ verify_oop_array(size, d, count, t2);
+ }
+ }
+ bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet());
+ __ leave();
+ __ mv(x10, zr); // return 0
+ __ ret();
+ return start;
+ }
+
+ // Arguments:
+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+ // ignored
+ // name - stub name string
+ //
+ // Inputs:
+ // c_rarg0 - source array address
+ // c_rarg1 - destination array address
+ // c_rarg2 - element count, treated as ssize_t, can be zero
+ //
+ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
+ // we let the hardware handle it. The one to eight bytes within words,
+ // dwords or qwords that span cache line boundaries will still be loaded
+ // and stored atomically.
+ //
+ // Side Effects:
+ // disjoint_byte_copy_entry is set to the no-overlap entry point //
+ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
+ // we let the hardware handle it. The one to eight bytes within words,
+ // dwords or qwords that span cache line boundaries will still be loaded
+ // and stored atomically.
+ //
+ // Side Effects:
+ // disjoint_byte_copy_entry is set to the no-overlap entry point
+ // used by generate_conjoint_byte_copy().
+ //
+ address generate_disjoint_byte_copy(bool aligned, address* entry, const char* name) {
+ const bool not_oop = false;
+ return generate_disjoint_copy(sizeof (jbyte), aligned, not_oop, entry, name);
+ }
+
+ // Arguments:
+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+ // ignored
+ // name - stub name string
+ //
+ // Inputs:
+ // c_rarg0 - source array address
+ // c_rarg1 - destination array address
+ // c_rarg2 - element count, treated as ssize_t, can be zero
+ //
+ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
+ // we let the hardware handle it. The one to eight bytes within words,
+ // dwords or qwords that span cache line boundaries will still be loaded
+ // and stored atomically.
+ //
+ address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
+ address* entry, const char* name) {
+ const bool not_oop = false;
+ return generate_conjoint_copy(sizeof (jbyte), aligned, not_oop, nooverlap_target, entry, name);
+ }
+
+ // Arguments:
+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+ // ignored
+ // name - stub name string
+ //
+ // Inputs:
+ // c_rarg0 - source array address
+ // c_rarg1 - destination array address
+ // c_rarg2 - element count, treated as ssize_t, can be zero
+ //
+ // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
+ // let the hardware handle it. The two or four words within dwords
+ // or qwords that span cache line boundaries will still be loaded
+ // and stored atomically.
+ //
+ // Side Effects:
+ // disjoint_short_copy_entry is set to the no-overlap entry point
+ // used by generate_conjoint_short_copy().
+ //
+ address generate_disjoint_short_copy(bool aligned,
+ address* entry, const char* name) {
+ const bool not_oop = false;
+ return generate_disjoint_copy(sizeof (jshort), aligned, not_oop, entry, name);
+ }
+
+ // Arguments:
+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+ // ignored
+ // name - stub name string
+ //
+ // Inputs:
+ // c_rarg0 - source array address
+ // c_rarg1 - destination array address
+ // c_rarg2 - element count, treated as ssize_t, can be zero
+ //
+ // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
+ // let the hardware handle it. The two or four words within dwords
+ // or qwords that span cache line boundaries will still be loaded
+ // and stored atomically.
+ //
+ address generate_conjoint_short_copy(bool aligned, address nooverlap_target,
+ address* entry, const char* name) {
+ const bool not_oop = false;
+ return generate_conjoint_copy(sizeof (jshort), aligned, not_oop, nooverlap_target, entry, name);
+ }
+
+ // Arguments:
+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+ // ignored
+ // name - stub name string
+ //
+ // Inputs:
+ // c_rarg0 - source array address
+ // c_rarg1 - destination array address
+ // c_rarg2 - element count, treated as ssize_t, can be zero
+ //
+ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
+ // the hardware handle it. The two dwords within qwords that span
+ // cache line boundaries will still be loaded and stored atomicly.
+ //
+ // Side Effects:
+ // disjoint_int_copy_entry is set to the no-overlap entry point
+ // used by generate_conjoint_int_oop_copy().
+ //
+ address generate_disjoint_int_copy(bool aligned, address* entry,
+ const char* name, bool dest_uninitialized = false) {
+ const bool not_oop = false;
+ return generate_disjoint_copy(sizeof (jint), aligned, not_oop, entry, name);
+ }
+
+ // Arguments:
+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+ // ignored
+ // name - stub name string
+ //
+ // Inputs:
+ // c_rarg0 - source array address
+ // c_rarg1 - destination array address
+ // c_rarg2 - element count, treated as ssize_t, can be zero
+ //
+ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
+ // the hardware handle it. The two dwords within qwords that span
+ // cache line boundaries will still be loaded and stored atomicly.
+ //
+ address generate_conjoint_int_copy(bool aligned, address nooverlap_target,
+ address* entry, const char* name,
+ bool dest_uninitialized = false) {
+ const bool not_oop = false;
+ return generate_conjoint_copy(sizeof (jint), aligned, not_oop, nooverlap_target, entry, name);
+ }
+
+
+ // Arguments:
+ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
+ // ignored
+ // name - stub name string
+ //
+ // Inputs:
+ // c_rarg0 - source array address
+ // c_rarg1 - destination array address
+ // c_rarg2 - element count, treated as size_t, can be zero
+ //
+ // Side Effects:
+ // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
+ // no-overlap entry point used by generate_conjoint_long_oop_copy().
+ //
+ address generate_disjoint_long_copy(bool aligned, address* entry,
+ const char* name, bool dest_uninitialized = false) {
+ const bool not_oop = false;
+ return generate_disjoint_copy(sizeof (jlong), aligned, not_oop, entry, name);
+ }
+
+ // Arguments:
+ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
+ // ignored
+ // name - stub name string
+ //
+ // Inputs:
+ // c_rarg0 - source array address
+ // c_rarg1 - destination array address
+ // c_rarg2 - element count, treated as size_t, can be zero
+ //
+ address generate_conjoint_long_copy(bool aligned,
+ address nooverlap_target, address* entry,
+ const char* name, bool dest_uninitialized = false) {
+ const bool not_oop = false;
+ return generate_conjoint_copy(sizeof (jlong), aligned, not_oop, nooverlap_target, entry, name);
+ }
+
+ // Arguments:
+ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
+ // ignored
+ // name - stub name string
+ //
+ // Inputs:
+ // c_rarg0 - source array address
+ // c_rarg1 - destination array address
+ // c_rarg2 - element count, treated as size_t, can be zero
+ //
+ // Side Effects:
+ // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
+ // no-overlap entry point used by generate_conjoint_long_oop_copy().
+ //
+ address generate_disjoint_oop_copy(bool aligned, address* entry,
+ const char* name, bool dest_uninitialized) {
+ const bool is_oop = true;
+ const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
+ return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized);
+ }
+
+ // Arguments:
+ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
+ // ignored
+ // name - stub name string
+ //
+ // Inputs:
+ // c_rarg0 - source array address
+ // c_rarg1 - destination array address
+ // c_rarg2 - element count, treated as size_t, can be zero
+ //
+ address generate_conjoint_oop_copy(bool aligned,
+ address nooverlap_target, address* entry,
+ const char* name, bool dest_uninitialized) {
+ const bool is_oop = true;
+ const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
+ return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry,
+ name, dest_uninitialized);
+ }
+
+ // Helper for generating a dynamic type check.
+ // Smashes t0, t1.
+ void generate_type_check(Register sub_klass,
+ Register super_check_offset,
+ Register super_klass,
+ Label& L_success) {
+ assert_different_registers(sub_klass, super_check_offset, super_klass);
+
+ BLOCK_COMMENT("type_check:");
+
+ Label L_miss;
+
+ __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL, super_check_offset);
+ __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL);
+
+ // Fall through on failure!
+ __ BIND(L_miss);
+ }
+
+ //
+ // Generate checkcasting array copy stub
+ //
+ // Input:
+ // c_rarg0 - source array address
+ // c_rarg1 - destination array address
+ // c_rarg2 - element count, treated as ssize_t, can be zero
+ // c_rarg3 - size_t ckoff (super_check_offset)
+ // c_rarg4 - oop ckval (super_klass)
+ //
+ // Output:
+ // x10 == 0 - success
+ // x10 == -1^K - failure, where K is partial transfer count
+ //
+ address generate_checkcast_copy(const char* name, address* entry,
+ bool dest_uninitialized = false) {
+ Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop;
+
+ // Input registers (after setup_arg_regs)
+ const Register from = c_rarg0; // source array address
+ const Register to = c_rarg1; // destination array address
+ const Register count = c_rarg2; // elementscount
+ const Register ckoff = c_rarg3; // super_check_offset
+ const Register ckval = c_rarg4; // super_klass
+
+ RegSet wb_pre_saved_regs = RegSet::range(c_rarg0, c_rarg4);
+ RegSet wb_post_saved_regs = RegSet::of(count);
+
+ // Registers used as temps (x7, x9, x18 are save-on-entry)
+ const Register count_save = x19; // orig elementscount
+ const Register start_to = x18; // destination array start address
+ const Register copied_oop = x7; // actual oop copied
+ const Register r9_klass = x9; // oop._klass
+
+ //---------------------------------------------------------------
+ // Assembler stub will be used for this call to arraycopy
+ // if the two arrays are subtypes of Object[] but the
+ // destination array type is not equal to or a supertype
+ // of the source type. Each element must be separately
+ // checked.
+
+ assert_different_registers(from, to, count, ckoff, ckval, start_to,
+ copied_oop, r9_klass, count_save);
+
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ address start = __ pc();
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+ // Caller of this entry point must set up the argument registers.
+ if (entry != NULL) {
+ *entry = __ pc();
+ BLOCK_COMMENT("Entry:");
+ }
+
+ // Empty array: Nothing to do
+ __ beqz(count, L_done);
+
+ __ push_reg(RegSet::of(x7, x9, x18, x19), sp);
+
+#ifdef ASSERT
+ BLOCK_COMMENT("assert consistent ckoff/ckval");
+ // The ckoff and ckval must be mutually consistent,
+ // even though caller generates both.
+ { Label L;
+ int sco_offset = in_bytes(Klass::super_check_offset_offset());
+ __ lwu(start_to, Address(ckval, sco_offset));
+ __ beq(ckoff, start_to, L);
+ __ stop("super_check_offset inconsistent");
+ __ bind(L);
+ }
+#endif //ASSERT
+
+ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT;
+ bool is_oop = true;
+ if (dest_uninitialized) {
+ decorators |= IS_DEST_UNINITIALIZED;
+ }
+
+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+ bs->arraycopy_prologue(_masm, decorators, is_oop, from, to, count, wb_pre_saved_regs);
+
+ // save the original count
+ __ mv(count_save, count);
+
+ // Copy from low to high addresses
+ __ mv(start_to, to); // Save destination array start address
+ __ j(L_load_element);
+
+ // ======== begin loop ========
+ // (Loop is rotated; its entry is L_load_element.)
+ // Loop control:
+ // for count to 0 do
+ // copied_oop = load_heap_oop(from++)
+ // ... generate_type_check ...
+ // store_heap_oop(to++, copied_oop)
+ // end
+
+ __ align(OptoLoopAlignment);
+
+ __ BIND(L_store_element);
+ __ store_heap_oop(Address(to, 0), copied_oop, noreg, noreg, AS_RAW); // store the oop
+ __ add(to, to, UseCompressedOops ? 4 : 8);
+ __ sub(count, count, 1);
+ __ beqz(count, L_do_card_marks);
+
+ // ======== loop entry is here ========
+ __ BIND(L_load_element);
+ __ load_heap_oop(copied_oop, Address(from, 0), noreg, noreg, AS_RAW); // load the oop
+ __ add(from, from, UseCompressedOops ? 4 : 8);
+ __ beqz(copied_oop, L_store_element);
+
+ __ load_klass(r9_klass, copied_oop);// query the object klass
+ generate_type_check(r9_klass, ckoff, ckval, L_store_element);
+ // ======== end loop ========
+
+ // It was a real error; we must depend on the caller to finish the job.
+ // Register count = remaining oops, count_orig = total oops.
+ // Emit GC store barriers for the oops we have copied and report
+ // their number to the caller.
+
+ __ sub(count, count_save, count); // K = partially copied oop count
+ __ xori(count, count, -1); // report (-1^K) to caller
+ __ beqz(count, L_done_pop);
+
+ __ BIND(L_do_card_marks);
+ bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, t0, wb_post_saved_regs);
+
+ __ bind(L_done_pop);
+ __ pop_reg(RegSet::of(x7, x9, x18, x19), sp);
+ inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
+
+ __ bind(L_done);
+ __ mv(x10, count);
+ __ leave();
+ __ ret();
+
+ return start;
+ }
+
+ // Perform range checks on the proposed arraycopy.
+ // Kills temp, but nothing else.
+ // Also, clean the sign bits of src_pos and dst_pos.
+ void arraycopy_range_checks(Register src, // source array oop (c_rarg0)
+ Register src_pos, // source position (c_rarg1)
+ Register dst, // destination array oo (c_rarg2)
+ Register dst_pos, // destination position (c_rarg3)
+ Register length,
+ Register temp,
+ Label& L_failed) {
+ BLOCK_COMMENT("arraycopy_range_checks:");
+
+ assert_different_registers(t0, temp);
+
+ // if [src_pos + length > arrayOop(src)->length()] then FAIL
+ __ lwu(t0, Address(src, arrayOopDesc::length_offset_in_bytes()));
+ __ addw(temp, length, src_pos);
+ __ bgtu(temp, t0, L_failed);
+
+ // if [dst_pos + length > arrayOop(dst)->length()] then FAIL
+ __ lwu(t0, Address(dst, arrayOopDesc::length_offset_in_bytes()));
+ __ addw(temp, length, dst_pos);
+ __ bgtu(temp, t0, L_failed);
+
+ // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'.
+ __ zero_extend(src_pos, src_pos, 32);
+ __ zero_extend(dst_pos, dst_pos, 32);
+
+ BLOCK_COMMENT("arraycopy_range_checks done");
+ }
+
+ //
+ // Generate 'unsafe' array copy stub
+ // Though just as safe as the other stubs, it takes an unscaled
+ // size_t argument instead of an element count.
+ //
+ // Input:
+ // c_rarg0 - source array address
+ // c_rarg1 - destination array address
+ // c_rarg2 - byte count, treated as ssize_t, can be zero
+ //
+ // Examines the alignment of the operands and dispatches
+ // to a long, int, short, or byte copy loop.
+ //
+ address generate_unsafe_copy(const char* name,
+ address byte_copy_entry,
+ address short_copy_entry,
+ address int_copy_entry,
+ address long_copy_entry) {
+ assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL &&
+ int_copy_entry != NULL && long_copy_entry != NULL);
+ Label L_long_aligned, L_int_aligned, L_short_aligned;
+ const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
+
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ address start = __ pc();
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+ // bump this on entry, not on exit:
+ inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
+
+ __ orr(t0, s, d);
+ __ orr(t0, t0, count);
+
+ __ andi(t0, t0, BytesPerLong - 1);
+ __ beqz(t0, L_long_aligned);
+ __ andi(t0, t0, BytesPerInt - 1);
+ __ beqz(t0, L_int_aligned);
+ __ andi(t0, t0, 1);
+ __ beqz(t0, L_short_aligned);
+ __ j(RuntimeAddress(byte_copy_entry));
+
+ __ BIND(L_short_aligned);
+ __ srli(count, count, LogBytesPerShort); // size => short_count
+ __ j(RuntimeAddress(short_copy_entry));
+ __ BIND(L_int_aligned);
+ __ srli(count, count, LogBytesPerInt); // size => int_count
+ __ j(RuntimeAddress(int_copy_entry));
+ __ BIND(L_long_aligned);
+ __ srli(count, count, LogBytesPerLong); // size => long_count
+ __ j(RuntimeAddress(long_copy_entry));
+
+ return start;
+ }
+
+ //
+ // Generate generic array copy stubs
+ //
+ // Input:
+ // c_rarg0 - src oop
+ // c_rarg1 - src_pos (32-bits)
+ // c_rarg2 - dst oop
+ // c_rarg3 - dst_pos (32-bits)
+ // c_rarg4 - element count (32-bits)
+ //
+ // Output:
+ // x10 == 0 - success
+ // x10 == -1^K - failure, where K is partial transfer count
+ //
+ address generate_generic_copy(const char* name,
+ address byte_copy_entry, address short_copy_entry,
+ address int_copy_entry, address oop_copy_entry,
+ address long_copy_entry, address checkcast_copy_entry) {
+ assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL &&
+ int_copy_entry != NULL && oop_copy_entry != NULL &&
+ long_copy_entry != NULL && checkcast_copy_entry != NULL);
+ Label L_failed, L_failed_0, L_objArray;
+ Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
+
+ // Input registers
+ const Register src = c_rarg0; // source array oop
+ const Register src_pos = c_rarg1; // source position
+ const Register dst = c_rarg2; // destination array oop
+ const Register dst_pos = c_rarg3; // destination position
+ const Register length = c_rarg4;
+
+ // Registers used as temps
+ const Register dst_klass = c_rarg5;
+
+ __ align(CodeEntryAlignment);
+
+ StubCodeMark mark(this, "StubRoutines", name);
+
+ address start = __ pc();
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+ // bump this on entry, not on exit:
+ inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
+
+ //-----------------------------------------------------------------------
+ // Assembler stub will be used for this call to arraycopy
+ // if the following conditions are met:
+ //
+ // (1) src and dst must not be null.
+ // (2) src_pos must not be negative.
+ // (3) dst_pos must not be negative.
+ // (4) length must not be negative.
+ // (5) src klass and dst klass should be the same and not NULL.
+ // (6) src and dst should be arrays.
+ // (7) src_pos + length must not exceed length of src.
+ // (8) dst_pos + length must not exceed length of dst.
+ //
+
+ // if [src == NULL] then return -1
+ __ beqz(src, L_failed);
+
+ // if [src_pos < 0] then return -1
+ // i.e. sign bit set
+ __ andi(t0, src_pos, 1UL << 31);
+ __ bnez(t0, L_failed);
+
+ // if [dst == NULL] then return -1
+ __ beqz(dst, L_failed);
+
+ // if [dst_pos < 0] then return -1
+ // i.e. sign bit set
+ __ andi(t0, dst_pos, 1UL << 31);
+ __ bnez(t0, L_failed);
+
+ // registers used as temp
+ const Register scratch_length = x28; // elements count to copy
+ const Register scratch_src_klass = x29; // array klass
+ const Register lh = x30; // layout helper
+
+ // if [length < 0] then return -1
+ __ addw(scratch_length, length, zr); // length (elements count, 32-bits value)
+ // i.e. sign bit set
+ __ andi(t0, scratch_length, 1UL << 31);
+ __ bnez(t0, L_failed);
+
+ __ load_klass(scratch_src_klass, src);
+#ifdef ASSERT
+ {
+ BLOCK_COMMENT("assert klasses not null {");
+ Label L1, L2;
+ __ bnez(scratch_src_klass, L2); // it is broken if klass is NULL
+ __ bind(L1);
+ __ stop("broken null klass");
+ __ bind(L2);
+ __ load_klass(t0, dst);
+ __ beqz(t0, L1); // this would be broken also
+ BLOCK_COMMENT("} assert klasses not null done");
+ }
+#endif
+
+ // Load layout helper (32-bits)
+ //
+ // |array_tag| | header_size | element_type | |log2_element_size|
+ // 32 30 24 16 8 2 0
+ //
+ // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
+ //
+
+ const int lh_offset = in_bytes(Klass::layout_helper_offset());
+
+ // Handle objArrays completely differently...
+ const jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
+ __ lw(lh, Address(scratch_src_klass, lh_offset));
+ __ mvw(t0, objArray_lh);
+ __ beq(lh, t0, L_objArray);
+
+ // if [src->klass() != dst->klass()] then return -1
+ __ load_klass(t1, dst);
+ __ bne(t1, scratch_src_klass, L_failed);
+
+ // if [src->is_Array() != NULL] then return -1
+ // i.e. (lh >= 0)
+ __ andi(t0, lh, 1UL << 31);
+ __ beqz(t0, L_failed);
+
+ // At this point, it is known to be a typeArray (array_tag 0x3).
+#ifdef ASSERT
+ {
+ BLOCK_COMMENT("assert primitive array {");
+ Label L;
+ __ mvw(t1, Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
+ __ bge(lh, t1, L);
+ __ stop("must be a primitive array");
+ __ bind(L);
+ BLOCK_COMMENT("} assert primitive array done");
+ }
+#endif
+
+ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
+ t1, L_failed);
+
+ // TypeArrayKlass
+ //
+ // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize)
+ // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize)
+ //
+
+ const Register t0_offset = t0; // array offset
+ const Register x22_elsize = lh; // element size
+
+ // Get array_header_in_bytes()
+ int lh_header_size_width = exact_log2(Klass::_lh_header_size_mask + 1);
+ int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width;
+ __ slli(t0_offset, lh, XLEN - lh_header_size_msb); // left shift to remove 24 ~ 32;
+ __ srli(t0_offset, t0_offset, XLEN - lh_header_size_width); // array_offset
+
+ __ add(src, src, t0_offset); // src array offset
+ __ add(dst, dst, t0_offset); // dst array offset
+ BLOCK_COMMENT("choose copy loop based on element size");
+
+ // next registers should be set before the jump to corresponding stub
+ const Register from = c_rarg0; // source array address
+ const Register to = c_rarg1; // destination array address
+ const Register count = c_rarg2; // elements count
+
+ // 'from', 'to', 'count' registers should be set in such order
+ // since they are the same as 'src', 'src_pos', 'dst'.
+
+ assert(Klass::_lh_log2_element_size_shift == 0, "fix this code");
+
+ // The possible values of elsize are 0-3, i.e. exact_log2(element
+ // size in bytes). We do a simple bitwise binary search.
+ __ BIND(L_copy_bytes);
+ __ andi(t0, x22_elsize, 2);
+ __ bnez(t0, L_copy_ints);
+ __ andi(t0, x22_elsize, 1);
+ __ bnez(t0, L_copy_shorts);
+ __ add(from, src, src_pos); // src_addr
+ __ add(to, dst, dst_pos); // dst_addr
+ __ addw(count, scratch_length, zr); // length
+ __ j(RuntimeAddress(byte_copy_entry));
+
+ __ BIND(L_copy_shorts);
+ __ shadd(from, src_pos, src, t0, 1); // src_addr
+ __ shadd(to, dst_pos, dst, t0, 1); // dst_addr
+ __ addw(count, scratch_length, zr); // length
+ __ j(RuntimeAddress(short_copy_entry));
+
+ __ BIND(L_copy_ints);
+ __ andi(t0, x22_elsize, 1);
+ __ bnez(t0, L_copy_longs);
+ __ shadd(from, src_pos, src, t0, 2); // src_addr
+ __ shadd(to, dst_pos, dst, t0, 2); // dst_addr
+ __ addw(count, scratch_length, zr); // length
+ __ j(RuntimeAddress(int_copy_entry));
+
+ __ BIND(L_copy_longs);
+#ifdef ASSERT
+ {
+ BLOCK_COMMENT("assert long copy {");
+ Label L;
+ __ andi(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> x22_elsize
+ __ addw(lh, lh, zr);
+ __ mvw(t0, LogBytesPerLong);
+ __ beq(x22_elsize, t0, L);
+ __ stop("must be long copy, but elsize is wrong");
+ __ bind(L);
+ BLOCK_COMMENT("} assert long copy done");
+ }
+#endif
+ __ shadd(from, src_pos, src, t0, 3); // src_addr
+ __ shadd(to, dst_pos, dst, t0, 3); // dst_addr
+ __ addw(count, scratch_length, zr); // length
+ __ j(RuntimeAddress(long_copy_entry));
+
+ // ObjArrayKlass
+ __ BIND(L_objArray);
+ // live at this point: scratch_src_klass, scratch_length, src[_pos], dst[_pos]
+
+ Label L_plain_copy, L_checkcast_copy;
+ // test array classes for subtyping
+ __ load_klass(t2, dst);
+ __ bne(scratch_src_klass, t2, L_checkcast_copy); // usual case is exact equality
+
+ // Identically typed arrays can be copied without element-wise checks.
+ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
+ t1, L_failed);
+
+ __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop);
+ __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+ __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop);
+ __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+ __ addw(count, scratch_length, zr); // length
+ __ BIND(L_plain_copy);
+ __ j(RuntimeAddress(oop_copy_entry));
+
+ __ BIND(L_checkcast_copy);
+ // live at this point: scratch_src_klass, scratch_length, t2 (dst_klass)
+ {
+ // Before looking at dst.length, make sure dst is also an objArray.
+ __ lwu(t0, Address(t2, lh_offset));
+ __ mvw(t1, objArray_lh);
+ __ bne(t0, t1, L_failed);
+
+ // It is safe to examine both src.length and dst.length.
+ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
+ t2, L_failed);
+
+ __ load_klass(dst_klass, dst); // reload
+
+ // Marshal the base address arguments now, freeing registers.
+ __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop);
+ __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+ __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop);
+ __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+ __ addw(count, length, zr); // length (reloaded)
+ const Register sco_temp = c_rarg3; // this register is free now
+ assert_different_registers(from, to, count, sco_temp,
+ dst_klass, scratch_src_klass);
+
+ // Generate the type check.
+ const int sco_offset = in_bytes(Klass::super_check_offset_offset());
+ __ lwu(sco_temp, Address(dst_klass, sco_offset));
+
+ // Smashes t0, t1
+ generate_type_check(scratch_src_klass, sco_temp, dst_klass, L_plain_copy);
+
+ // Fetch destination element klass from the ObjArrayKlass header.
+ int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
+ __ ld(dst_klass, Address(dst_klass, ek_offset));
+ __ lwu(sco_temp, Address(dst_klass, sco_offset));
+
+ // the checkcast_copy loop needs two extra arguments:
+ assert(c_rarg3 == sco_temp, "#3 already in place");
+ // Set up arguments for checkcast_copy_entry.
+ __ mv(c_rarg4, dst_klass); // dst.klass.element_klass
+ __ j(RuntimeAddress(checkcast_copy_entry));
+ }
+
+ __ BIND(L_failed);
+ __ li(x10, -1);
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret();
+
+ return start;
+ }
+
+ //
+ // Generate stub for array fill. If "aligned" is true, the
+ // "to" address is assumed to be heapword aligned.
+ //
+ // Arguments for generated stub:
+ // to: c_rarg0
+ // value: c_rarg1
+ // count: c_rarg2 treated as signed
+ //
+ address generate_fill(BasicType t, bool aligned, const char* name) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ address start = __ pc();
+
+ BLOCK_COMMENT("Entry:");
+
+ const Register to = c_rarg0; // source array address
+ const Register value = c_rarg1; // value
+ const Register count = c_rarg2; // elements count
+
+ const Register bz_base = x28; // base for block_zero routine
+ const Register cnt_words = x29; // temp register
+ const Register tmp_reg = t1;
+
+ __ enter();
+
+ Label L_fill_elements, L_exit1;
+
+ int shift = -1;
+ switch (t) {
+ case T_BYTE:
+ shift = 0;
+
+ // Zero extend value
+ // 8 bit -> 16 bit
+ __ andi(value, value, 0xff);
+ __ mv(tmp_reg, value);
+ __ slli(tmp_reg, tmp_reg, 8);
+ __ orr(value, value, tmp_reg);
+
+ // 16 bit -> 32 bit
+ __ mv(tmp_reg, value);
+ __ slli(tmp_reg, tmp_reg, 16);
+ __ orr(value, value, tmp_reg);
+
+ __ mv(tmp_reg, 8 >> shift); // Short arrays (< 8 bytes) fill by element
+ __ bltu(count, tmp_reg, L_fill_elements);
+ break;
+ case T_SHORT:
+ shift = 1;
+ // Zero extend value
+ // 16 bit -> 32 bit
+ __ andi(value, value, 0xffff);
+ __ mv(tmp_reg, value);
+ __ slli(tmp_reg, tmp_reg, 16);
+ __ orr(value, value, tmp_reg);
+
+ // Short arrays (< 8 bytes) fill by element
+ __ mv(tmp_reg, 8 >> shift);
+ __ bltu(count, tmp_reg, L_fill_elements);
+ break;
+ case T_INT:
+ shift = 2;
+
+ // Short arrays (< 8 bytes) fill by element
+ __ mv(tmp_reg, 8 >> shift);
+ __ bltu(count, tmp_reg, L_fill_elements);
+ break;
+ default: ShouldNotReachHere();
+ }
+
+ // Align source address at 8 bytes address boundary.
+ Label L_skip_align1, L_skip_align2, L_skip_align4;
+ if (!aligned) {
+ switch (t) {
+ case T_BYTE:
+ // One byte misalignment happens only for byte arrays.
+ __ andi(t0, to, 1);
+ __ beqz(t0, L_skip_align1);
+ __ sb(value, Address(to, 0));
+ __ addi(to, to, 1);
+ __ addiw(count, count, -1);
+ __ bind(L_skip_align1);
+ // Fallthrough
+ case T_SHORT:
+ // Two bytes misalignment happens only for byte and short (char) arrays.
+ __ andi(t0, to, 2);
+ __ beqz(t0, L_skip_align2);
+ __ sh(value, Address(to, 0));
+ __ addi(to, to, 2);
+ __ addiw(count, count, -(2 >> shift));
+ __ bind(L_skip_align2);
+ // Fallthrough
+ case T_INT:
+ // Align to 8 bytes, we know we are 4 byte aligned to start.
+ __ andi(t0, to, 4);
+ __ beqz(t0, L_skip_align4);
+ __ sw(value, Address(to, 0));
+ __ addi(to, to, 4);
+ __ addiw(count, count, -(4 >> shift));
+ __ bind(L_skip_align4);
+ break;
+ default: ShouldNotReachHere();
+ }
+ }
+
+ //
+ // Fill large chunks
+ //
+ __ srliw(cnt_words, count, 3 - shift); // number of words
+
+ // 32 bit -> 64 bit
+ __ andi(value, value, 0xffffffff);
+ __ mv(tmp_reg, value);
+ __ slli(tmp_reg, tmp_reg, 32);
+ __ orr(value, value, tmp_reg);
+
+ __ slli(tmp_reg, cnt_words, 3 - shift);
+ __ subw(count, count, tmp_reg);
+ {
+ __ fill_words(to, cnt_words, value);
+ }
+
+ // Remaining count is less than 8 bytes. Fill it by a single store.
+ // Note that the total length is no less than 8 bytes.
+ if (t == T_BYTE || t == T_SHORT) {
+ __ beqz(count, L_exit1);
+ __ shadd(to, count, to, tmp_reg, shift); // points to the end
+ __ sd(value, Address(to, -8)); // overwrite some elements
+ __ bind(L_exit1);
+ __ leave();
+ __ ret();
+ }
+
+ // Handle copies less than 8 bytes.
+ Label L_fill_2, L_fill_4, L_exit2;
+ __ bind(L_fill_elements);
+ switch (t) {
+ case T_BYTE:
+ __ andi(t0, count, 1);
+ __ beqz(t0, L_fill_2);
+ __ sb(value, Address(to, 0));
+ __ addi(to, to, 1);
+ __ bind(L_fill_2);
+ __ andi(t0, count, 2);
+ __ beqz(t0, L_fill_4);
+ __ sh(value, Address(to, 0));
+ __ addi(to, to, 2);
+ __ bind(L_fill_4);
+ __ andi(t0, count, 4);
+ __ beqz(t0, L_exit2);
+ __ sw(value, Address(to, 0));
+ break;
+ case T_SHORT:
+ __ andi(t0, count, 1);
+ __ beqz(t0, L_fill_4);
+ __ sh(value, Address(to, 0));
+ __ addi(to, to, 2);
+ __ bind(L_fill_4);
+ __ andi(t0, count, 2);
+ __ beqz(t0, L_exit2);
+ __ sw(value, Address(to, 0));
+ break;
+ case T_INT:
+ __ beqz(count, L_exit2);
+ __ sw(value, Address(to, 0));
+ break;
+ default: ShouldNotReachHere();
+ }
+ __ bind(L_exit2);
+ __ leave();
+ __ ret();
+ return start;
+ }
+
+ void generate_arraycopy_stubs() {
+ address entry = NULL;
+ address entry_jbyte_arraycopy = NULL;
+ address entry_jshort_arraycopy = NULL;
+ address entry_jint_arraycopy = NULL;
+ address entry_oop_arraycopy = NULL;
+ address entry_jlong_arraycopy = NULL;
+ address entry_checkcast_arraycopy = NULL;
+
+ generate_copy_longs(copy_f, c_rarg0, c_rarg1, t1, copy_forwards);
+ generate_copy_longs(copy_b, c_rarg0, c_rarg1, t1, copy_backwards);
+
+ StubRoutines::riscv::_zero_blocks = generate_zero_blocks();
+
+ //*** jbyte
+ // Always need aligned and unaligned versions
+ StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, &entry,
+ "jbyte_disjoint_arraycopy");
+ StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, entry,
+ &entry_jbyte_arraycopy,
+ "jbyte_arraycopy");
+ StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, &entry,
+ "arrayof_jbyte_disjoint_arraycopy");
+ StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, entry, NULL,
+ "arrayof_jbyte_arraycopy");
+
+ //*** jshort
+ // Always need aligned and unaligned versions
+ StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry,
+ "jshort_disjoint_arraycopy");
+ StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry,
+ &entry_jshort_arraycopy,
+ "jshort_arraycopy");
+ StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry,
+ "arrayof_jshort_disjoint_arraycopy");
+ StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, entry, NULL,
+ "arrayof_jshort_arraycopy");
+
+ //*** jint
+ // Aligned versions
+ StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry,
+ "arrayof_jint_disjoint_arraycopy");
+ StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy,
+ "arrayof_jint_arraycopy");
+ // In 64 bit we need both aligned and unaligned versions of jint arraycopy.
+ // entry_jint_arraycopy always points to the unaligned version
+ StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, &entry,
+ "jint_disjoint_arraycopy");
+ StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, entry,
+ &entry_jint_arraycopy,
+ "jint_arraycopy");
+
+ //*** jlong
+ // It is always aligned
+ StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry,
+ "arrayof_jlong_disjoint_arraycopy");
+ StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy,
+ "arrayof_jlong_arraycopy");
+ StubRoutines::_jlong_disjoint_arraycopy = StubRoutines::_arrayof_jlong_disjoint_arraycopy;
+ StubRoutines::_jlong_arraycopy = StubRoutines::_arrayof_jlong_arraycopy;
+
+ //*** oops
+ {
+ // With compressed oops we need unaligned versions; notice that
+ // we overwrite entry_oop_arraycopy.
+ bool aligned = !UseCompressedOops;
+
+ StubRoutines::_arrayof_oop_disjoint_arraycopy
+ = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy",
+ /*dest_uninitialized*/false);
+ StubRoutines::_arrayof_oop_arraycopy
+ = generate_conjoint_oop_copy(aligned, entry, &entry_oop_arraycopy, "arrayof_oop_arraycopy",
+ /*dest_uninitialized*/false);
+ // Aligned versions without pre-barriers
+ StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit
+ = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy_uninit",
+ /*dest_uninitialized*/true);
+ StubRoutines::_arrayof_oop_arraycopy_uninit
+ = generate_conjoint_oop_copy(aligned, entry, NULL, "arrayof_oop_arraycopy_uninit",
+ /*dest_uninitialized*/true);
+ }
+
+ StubRoutines::_oop_disjoint_arraycopy = StubRoutines::_arrayof_oop_disjoint_arraycopy;
+ StubRoutines::_oop_arraycopy = StubRoutines::_arrayof_oop_arraycopy;
+ StubRoutines::_oop_disjoint_arraycopy_uninit = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
+ StubRoutines::_oop_arraycopy_uninit = StubRoutines::_arrayof_oop_arraycopy_uninit;
+
+ StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
+ StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
+ /*dest_uninitialized*/true);
+
+
+ StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy",
+ entry_jbyte_arraycopy,
+ entry_jshort_arraycopy,
+ entry_jint_arraycopy,
+ entry_jlong_arraycopy);
+
+ StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy",
+ entry_jbyte_arraycopy,
+ entry_jshort_arraycopy,
+ entry_jint_arraycopy,
+ entry_oop_arraycopy,
+ entry_jlong_arraycopy,
+ entry_checkcast_arraycopy);
+
+ StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
+ StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
+ StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
+ StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
+ StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
+ StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
+ }
+
+ // Safefetch stubs.
+ void generate_safefetch(const char* name, int size, address* entry,
+ address* fault_pc, address* continuation_pc) {
+ // safefetch signatures:
+ // int SafeFetch32(int* adr, int errValue)
+ // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue)
+ //
+ // arguments:
+ // c_rarg0 = adr
+ // c_rarg1 = errValue
+ //
+ // result:
+ // PPC_RET = *adr or errValue
+ assert_cond(entry != NULL && fault_pc != NULL && continuation_pc != NULL);
+ StubCodeMark mark(this, "StubRoutines", name);
+
+ // Entry point, pc or function descriptor.
+ *entry = __ pc();
+
+ // Load *adr into c_rarg1, may fault.
+ *fault_pc = __ pc();
+ switch (size) {
+ case 4:
+ // int32_t
+ __ lw(c_rarg1, Address(c_rarg0, 0));
+ break;
+ case 8:
+ // int64_t
+ __ ld(c_rarg1, Address(c_rarg0, 0));
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+
+ // return errValue or *adr
+ *continuation_pc = __ pc();
+ __ mv(x10, c_rarg1);
+ __ ret();
+ }
+
+ // code for comparing 16 bytes of strings with same encoding
+ void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) {
+ const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, tmp1 = x28, tmp2 = x29, tmp4 = x7, tmp5 = x31;
+ __ ld(tmp5, Address(str1));
+ __ addi(str1, str1, 8);
+ __ xorr(tmp4, tmp1, tmp2);
+ __ ld(cnt1, Address(str2));
+ __ addi(str2, str2, 8);
+ __ bnez(tmp4, DIFF1);
+ __ ld(tmp1, Address(str1));
+ __ addi(str1, str1, 8);
+ __ xorr(tmp4, tmp5, cnt1);
+ __ ld(tmp2, Address(str2));
+ __ addi(str2, str2, 8);
+ __ bnez(tmp4, DIFF2);
+ }
+
+ // code for comparing 8 characters of strings with Latin1 and Utf16 encoding
+ void compare_string_8_x_LU(Register tmpL, Register tmpU, Label &DIFF1,
+ Label &DIFF2) {
+ const Register strU = x12, curU = x7, strL = x29, tmp = x30;
+ __ ld(tmpL, Address(strL));
+ __ addi(strL, strL, 8);
+ __ ld(tmpU, Address(strU));
+ __ addi(strU, strU, 8);
+ __ inflate_lo32(tmp, tmpL);
+ __ mv(t0, tmp);
+ __ xorr(tmp, curU, t0);
+ __ bnez(tmp, DIFF2);
+
+ __ ld(curU, Address(strU));
+ __ addi(strU, strU, 8);
+ __ inflate_hi32(tmp, tmpL);
+ __ mv(t0, tmp);
+ __ xorr(tmp, tmpU, t0);
+ __ bnez(tmp, DIFF1);
+ }
+
+ // x10 = result
+ // x11 = str1
+ // x12 = cnt1
+ // x13 = str2
+ // x14 = cnt2
+ // x28 = tmp1
+ // x29 = tmp2
+ // x30 = tmp3
+ address generate_compare_long_string_different_encoding(bool isLU) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", isLU ? "compare_long_string_different_encoding LU" : "compare_long_string_different_encoding UL");
+ address entry = __ pc();
+ Label SMALL_LOOP, TAIL, TAIL_LOAD_16, LOAD_LAST, DIFF1, DIFF2,
+ DONE, CALCULATE_DIFFERENCE;
+ const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14,
+ tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31;
+ RegSet spilled_regs = RegSet::of(tmp4, tmp5);
+
+ // cnt2 == amount of characters left to compare
+ // Check already loaded first 4 symbols
+ __ inflate_lo32(tmp3, isLU ? tmp1 : tmp2);
+ __ mv(isLU ? tmp1 : tmp2, tmp3);
+ __ addi(str1, str1, isLU ? wordSize / 2 : wordSize);
+ __ addi(str2, str2, isLU ? wordSize : wordSize / 2);
+ __ sub(cnt2, cnt2, 8); // Already loaded 4 symbols. Last 4 is special case.
+ __ push_reg(spilled_regs, sp);
+
+ if (isLU) {
+ __ add(str1, str1, cnt2);
+ __ shadd(str2, cnt2, str2, t0, 1);
+ } else {
+ __ shadd(str1, cnt2, str1, t0, 1);
+ __ add(str2, str2, cnt2);
+ }
+ __ xorr(tmp3, tmp1, tmp2);
+ __ mv(tmp5, tmp2);
+ __ bnez(tmp3, CALCULATE_DIFFERENCE);
+
+ Register strU = isLU ? str2 : str1,
+ strL = isLU ? str1 : str2,
+ tmpU = isLU ? tmp5 : tmp1, // where to keep U for comparison
+ tmpL = isLU ? tmp1 : tmp5; // where to keep L for comparison
+
+ __ sub(tmp2, strL, cnt2); // strL pointer to load from
+ __ slli(t0, cnt2, 1);
+ __ sub(cnt1, strU, t0); // strU pointer to load from
+
+ __ ld(tmp4, Address(cnt1));
+ __ addi(cnt1, cnt1, 8);
+ __ beqz(cnt2, LOAD_LAST); // no characters left except last load
+ __ sub(cnt2, cnt2, 16);
+ __ bltz(cnt2, TAIL);
+ __ bind(SMALL_LOOP); // smaller loop
+ __ sub(cnt2, cnt2, 16);
+ compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
+ compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
+ __ bgez(cnt2, SMALL_LOOP);
+ __ addi(t0, cnt2, 16);
+ __ beqz(t0, LOAD_LAST);
+ __ bind(TAIL); // 1..15 characters left until last load (last 4 characters)
+ // Address of 8 bytes before last 4 characters in UTF-16 string
+ __ shadd(cnt1, cnt2, cnt1, t0, 1);
+ // Address of 16 bytes before last 4 characters in Latin1 string
+ __ add(tmp2, tmp2, cnt2);
+ __ ld(tmp4, Address(cnt1, -8));
+ // last 16 characters before last load
+ compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
+ compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
+ __ j(LOAD_LAST);
+ __ bind(DIFF2);
+ __ mv(tmpU, tmp4);
+ __ bind(DIFF1);
+ __ mv(tmpL, t0);
+ __ j(CALCULATE_DIFFERENCE);
+ __ bind(LOAD_LAST);
+ // Last 4 UTF-16 characters are already pre-loaded into tmp4 by compare_string_8_x_LU.
+ // No need to load it again
+ __ mv(tmpU, tmp4);
+ __ ld(tmpL, Address(strL));
+ __ inflate_lo32(tmp3, tmpL);
+ __ mv(tmpL, tmp3);
+ __ xorr(tmp3, tmpU, tmpL);
+ __ beqz(tmp3, DONE);
+
+ // Find the first different characters in the longwords and
+ // compute their difference.
+ __ bind(CALCULATE_DIFFERENCE);
+ __ ctzc_bit(tmp4, tmp3);
+ __ srl(tmp1, tmp1, tmp4);
+ __ srl(tmp5, tmp5, tmp4);
+ __ andi(tmp1, tmp1, 0xFFFF);
+ __ andi(tmp5, tmp5, 0xFFFF);
+ __ sub(result, tmp1, tmp5);
+ __ bind(DONE);
+ __ pop_reg(spilled_regs, sp);
+ __ ret();
+ return entry;
+ }
+
+ address generate_method_entry_barrier() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
+
+ Label deoptimize_label;
+
+ address start = __ pc();
+
+ __ set_last_Java_frame(sp, fp, ra, t0);
+
+ __ enter();
+ __ add(t1, sp, wordSize);
+
+ __ sub(sp, sp, 4 * wordSize);
+
+ __ push_call_clobbered_registers();
+
+ __ mv(c_rarg0, t1);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetNMethod::nmethod_stub_entry_barrier), 1);
+
+ __ reset_last_Java_frame(true);
+
+ __ mv(t0, x10);
+
+ __ pop_call_clobbered_registers();
+
+ __ bnez(t0, deoptimize_label);
+
+ __ leave();
+ __ ret();
+
+ __ BIND(deoptimize_label);
+
+ __ ld(t0, Address(sp, 0));
+ __ ld(fp, Address(sp, wordSize));
+ __ ld(ra, Address(sp, wordSize * 2));
+ __ ld(t1, Address(sp, wordSize * 3));
+
+ __ mv(sp, t0);
+ __ jr(t1);
+
+ return start;
+ }
+
+ // x10 = result
+ // x11 = str1
+ // x12 = cnt1
+ // x13 = str2
+ // x14 = cnt2
+ // x28 = tmp1
+ // x29 = tmp2
+ // x30 = tmp3
+ // x31 = tmp4
+ address generate_compare_long_string_same_encoding(bool isLL) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", isLL ?
+ "compare_long_string_same_encoding LL" : "compare_long_string_same_encoding UU");
+ address entry = __ pc();
+ Label SMALL_LOOP, CHECK_LAST, DIFF2, TAIL,
+ LENGTH_DIFF, DIFF, LAST_CHECK_AND_LENGTH_DIFF;
+ const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14,
+ tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31;
+ RegSet spilled_regs = RegSet::of(tmp4, tmp5);
+
+ // cnt1/cnt2 contains amount of characters to compare. cnt1 can be re-used
+ // update cnt2 counter with already loaded 8 bytes
+ __ sub(cnt2, cnt2, wordSize / (isLL ? 1 : 2));
+ // update pointers, because of previous read
+ __ add(str1, str1, wordSize);
+ __ add(str2, str2, wordSize);
+ // less than 16 bytes left?
+ __ sub(cnt2, cnt2, isLL ? 16 : 8);
+ __ push_reg(spilled_regs, sp);
+ __ bltz(cnt2, TAIL);
+ __ bind(SMALL_LOOP);
+ compare_string_16_bytes_same(DIFF, DIFF2);
+ __ sub(cnt2, cnt2, isLL ? 16 : 8);
+ __ bgez(cnt2, SMALL_LOOP);
+ __ bind(TAIL);
+ __ addi(cnt2, cnt2, isLL ? 16 : 8);
+ __ beqz(cnt2, LAST_CHECK_AND_LENGTH_DIFF);
+ __ sub(cnt2, cnt2, isLL ? 8 : 4);
+ __ blez(cnt2, CHECK_LAST);
+ __ xorr(tmp4, tmp1, tmp2);
+ __ bnez(tmp4, DIFF);
+ __ ld(tmp1, Address(str1));
+ __ addi(str1, str1, 8);
+ __ ld(tmp2, Address(str2));
+ __ addi(str2, str2, 8);
+ __ sub(cnt2, cnt2, isLL ? 8 : 4);
+ __ bind(CHECK_LAST);
+ if (!isLL) {
+ __ add(cnt2, cnt2, cnt2); // now in bytes
+ }
+ __ xorr(tmp4, tmp1, tmp2);
+ __ bnez(tmp4, DIFF);
+ __ add(str1, str1, cnt2);
+ __ ld(tmp5, Address(str1));
+ __ add(str2, str2, cnt2);
+ __ ld(cnt1, Address(str2));
+ __ xorr(tmp4, tmp5, cnt1);
+ __ beqz(tmp4, LENGTH_DIFF);
+ // Find the first different characters in the longwords and
+ // compute their difference.
+ __ bind(DIFF2);
+ __ ctzc_bit(tmp3, tmp4, isLL); // count zero from lsb to msb
+ __ srl(tmp5, tmp5, tmp3);
+ __ srl(cnt1, cnt1, tmp3);
+ if (isLL) {
+ __ andi(tmp5, tmp5, 0xFF);
+ __ andi(cnt1, cnt1, 0xFF);
+ } else {
+ __ andi(tmp5, tmp5, 0xFFFF);
+ __ andi(cnt1, cnt1, 0xFFFF);
+ }
+ __ sub(result, tmp5, cnt1);
+ __ j(LENGTH_DIFF);
+ __ bind(DIFF);
+ __ ctzc_bit(tmp3, tmp4, isLL); // count zero from lsb to msb
+ __ srl(tmp1, tmp1, tmp3);
+ __ srl(tmp2, tmp2, tmp3);
+ if (isLL) {
+ __ andi(tmp1, tmp1, 0xFF);
+ __ andi(tmp2, tmp2, 0xFF);
+ } else {
+ __ andi(tmp1, tmp1, 0xFFFF);
+ __ andi(tmp2, tmp2, 0xFFFF);
+ }
+ __ sub(result, tmp1, tmp2);
+ __ j(LENGTH_DIFF);
+ __ bind(LAST_CHECK_AND_LENGTH_DIFF);
+ __ xorr(tmp4, tmp1, tmp2);
+ __ bnez(tmp4, DIFF);
+ __ bind(LENGTH_DIFF);
+ __ pop_reg(spilled_regs, sp);
+ __ ret();
+ return entry;
+ }
+
+ void generate_compare_long_strings() {
+ StubRoutines::riscv::_compare_long_string_LL = generate_compare_long_string_same_encoding(true);
+ StubRoutines::riscv::_compare_long_string_UU = generate_compare_long_string_same_encoding(false);
+ StubRoutines::riscv::_compare_long_string_LU = generate_compare_long_string_different_encoding(true);
+ StubRoutines::riscv::_compare_long_string_UL = generate_compare_long_string_different_encoding(false);
+ }
+
+ // x10 result
+ // x11 src
+ // x12 src count
+ // x13 pattern
+ // x14 pattern count
+ address generate_string_indexof_linear(bool needle_isL, bool haystack_isL)
+ {
+ const char* stubName = needle_isL
+ ? (haystack_isL ? "indexof_linear_ll" : "indexof_linear_ul")
+ : "indexof_linear_uu";
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", stubName);
+ address entry = __ pc();
+
+ int needle_chr_size = needle_isL ? 1 : 2;
+ int haystack_chr_size = haystack_isL ? 1 : 2;
+ int needle_chr_shift = needle_isL ? 0 : 1;
+ int haystack_chr_shift = haystack_isL ? 0 : 1;
+ bool isL = needle_isL && haystack_isL;
+ // parameters
+ Register result = x10, haystack = x11, haystack_len = x12, needle = x13, needle_len = x14;
+ // temporary registers
+ Register mask1 = x20, match_mask = x21, first = x22, trailing_zeros = x23, mask2 = x24, tmp = x25;
+ // redefinitions
+ Register ch1 = x28, ch2 = x29;
+ RegSet spilled_regs = RegSet::range(x20, x25) + RegSet::range(x28, x29);
+
+ __ push_reg(spilled_regs, sp);
+
+ Label L_LOOP, L_LOOP_PROCEED, L_SMALL, L_HAS_ZERO,
+ L_HAS_ZERO_LOOP, L_CMP_LOOP, L_CMP_LOOP_NOMATCH, L_SMALL_PROCEED,
+ L_SMALL_HAS_ZERO_LOOP, L_SMALL_CMP_LOOP_NOMATCH, L_SMALL_CMP_LOOP,
+ L_POST_LOOP, L_CMP_LOOP_LAST_CMP, L_HAS_ZERO_LOOP_NOMATCH,
+ L_SMALL_CMP_LOOP_LAST_CMP, L_SMALL_CMP_LOOP_LAST_CMP2,
+ L_CMP_LOOP_LAST_CMP2, DONE, NOMATCH;
+
+ __ ld(ch1, Address(needle));
+ __ ld(ch2, Address(haystack));
+ // src.length - pattern.length
+ __ sub(haystack_len, haystack_len, needle_len);
+
+ // first is needle[0]
+ __ andi(first, ch1, needle_isL ? 0xFF : 0xFFFF, first);
+ uint64_t mask0101 = UCONST64(0x0101010101010101);
+ uint64_t mask0001 = UCONST64(0x0001000100010001);
+ __ mv(mask1, haystack_isL ? mask0101 : mask0001);
+ __ mul(first, first, mask1);
+ uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f);
+ uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff);
+ __ mv(mask2, haystack_isL ? mask7f7f : mask7fff);
+ if (needle_isL != haystack_isL) {
+ __ mv(tmp, ch1);
+ }
+ __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size - 1);
+ __ blez(haystack_len, L_SMALL);
+
+ if (needle_isL != haystack_isL) {
+ __ inflate_lo32(ch1, tmp, match_mask, trailing_zeros);
+ }
+ // xorr, sub, orr, notr, andr
+ // compare and set match_mask[i] with 0x80/0x8000 (Latin1/UTF16) if ch2[i] == first[i]
+ // eg:
+ // first: aa aa aa aa aa aa aa aa
+ // ch2: aa aa li nx jd ka aa aa
+ // match_mask: 80 80 00 00 00 00 80 80
+ __ compute_match_mask(ch2, first, match_mask, mask1, mask2);
+
+ // search first char of needle, if success, goto L_HAS_ZERO;
+ __ bnez(match_mask, L_HAS_ZERO);
+ __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size);
+ __ add(result, result, wordSize / haystack_chr_size);
+ __ add(haystack, haystack, wordSize);
+ __ bltz(haystack_len, L_POST_LOOP);
+
+ __ bind(L_LOOP);
+ __ ld(ch2, Address(haystack));
+ __ compute_match_mask(ch2, first, match_mask, mask1, mask2);
+ __ bnez(match_mask, L_HAS_ZERO);
+
+ __ bind(L_LOOP_PROCEED);
+ __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size);
+ __ add(haystack, haystack, wordSize);
+ __ add(result, result, wordSize / haystack_chr_size);
+ __ bgez(haystack_len, L_LOOP);
+
+ __ bind(L_POST_LOOP);
+ __ mv(ch2, -wordSize / haystack_chr_size);
+ __ ble(haystack_len, ch2, NOMATCH); // no extra characters to check
+ __ ld(ch2, Address(haystack));
+ __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift);
+ __ neg(haystack_len, haystack_len);
+ __ xorr(ch2, first, ch2);
+ __ sub(match_mask, ch2, mask1);
+ __ orr(ch2, ch2, mask2);
+ __ mv(trailing_zeros, -1); // all bits set
+ __ j(L_SMALL_PROCEED);
+
+ __ align(OptoLoopAlignment);
+ __ bind(L_SMALL);
+ __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift);
+ __ neg(haystack_len, haystack_len);
+ if (needle_isL != haystack_isL) {
+ __ inflate_lo32(ch1, tmp, match_mask, trailing_zeros);
+ }
+ __ xorr(ch2, first, ch2);
+ __ sub(match_mask, ch2, mask1);
+ __ orr(ch2, ch2, mask2);
+ __ mv(trailing_zeros, -1); // all bits set
+
+ __ bind(L_SMALL_PROCEED);
+ __ srl(trailing_zeros, trailing_zeros, haystack_len); // mask. zeroes on useless bits.
+ __ notr(ch2, ch2);
+ __ andr(match_mask, match_mask, ch2);
+ __ andr(match_mask, match_mask, trailing_zeros); // clear useless bits and check
+ __ beqz(match_mask, NOMATCH);
+
+ __ bind(L_SMALL_HAS_ZERO_LOOP);
+ __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, ch2, tmp); // count trailing zeros
+ __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
+ __ mv(ch2, wordSize / haystack_chr_size);
+ __ ble(needle_len, ch2, L_SMALL_CMP_LOOP_LAST_CMP2);
+ __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
+ __ mv(trailing_zeros, wordSize / haystack_chr_size);
+ __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH);
+
+ __ bind(L_SMALL_CMP_LOOP);
+ __ shadd(first, trailing_zeros, needle, first, needle_chr_shift);
+ __ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift);
+ needle_isL ? __ lbu(first, Address(first)) : __ lhu(first, Address(first));
+ haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2));
+ __ add(trailing_zeros, trailing_zeros, 1);
+ __ bge(trailing_zeros, needle_len, L_SMALL_CMP_LOOP_LAST_CMP);
+ __ beq(first, ch2, L_SMALL_CMP_LOOP);
+
+ __ bind(L_SMALL_CMP_LOOP_NOMATCH);
+ __ beqz(match_mask, NOMATCH);
+ __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2);
+ __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
+ __ add(result, result, 1);
+ __ add(haystack, haystack, haystack_chr_size);
+ __ j(L_SMALL_HAS_ZERO_LOOP);
+
+ __ align(OptoLoopAlignment);
+ __ bind(L_SMALL_CMP_LOOP_LAST_CMP);
+ __ bne(first, ch2, L_SMALL_CMP_LOOP_NOMATCH);
+ __ j(DONE);
+
+ __ align(OptoLoopAlignment);
+ __ bind(L_SMALL_CMP_LOOP_LAST_CMP2);
+ __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
+ __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH);
+ __ j(DONE);
+
+ __ align(OptoLoopAlignment);
+ __ bind(L_HAS_ZERO);
+ __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2);
+ __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
+ __ slli(needle_len, needle_len, BitsPerByte * wordSize / 2);
+ __ orr(haystack_len, haystack_len, needle_len); // restore needle_len(32bits)
+ __ sub(result, result, 1); // array index from 0, so result -= 1
+
+ __ bind(L_HAS_ZERO_LOOP);
+ __ mv(needle_len, wordSize / haystack_chr_size);
+ __ srli(ch2, haystack_len, BitsPerByte * wordSize / 2);
+ __ bge(needle_len, ch2, L_CMP_LOOP_LAST_CMP2);
+ // load next 8 bytes from haystack, and increase result index
+ __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
+ __ add(result, result, 1);
+ __ mv(trailing_zeros, wordSize / haystack_chr_size);
+ __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH);
+
+ // compare one char
+ __ bind(L_CMP_LOOP);
+ __ shadd(needle_len, trailing_zeros, needle, needle_len, needle_chr_shift);
+ needle_isL ? __ lbu(needle_len, Address(needle_len)) : __ lhu(needle_len, Address(needle_len));
+ __ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift);
+ haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2));
+ __ add(trailing_zeros, trailing_zeros, 1); // next char index
+ __ srli(tmp, haystack_len, BitsPerByte * wordSize / 2);
+ __ bge(trailing_zeros, tmp, L_CMP_LOOP_LAST_CMP);
+ __ beq(needle_len, ch2, L_CMP_LOOP);
+
+ __ bind(L_CMP_LOOP_NOMATCH);
+ __ beqz(match_mask, L_HAS_ZERO_LOOP_NOMATCH);
+ __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, needle_len, ch2); // find next "first" char index
+ __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
+ __ add(haystack, haystack, haystack_chr_size);
+ __ j(L_HAS_ZERO_LOOP);
+
+ __ align(OptoLoopAlignment);
+ __ bind(L_CMP_LOOP_LAST_CMP);
+ __ bne(needle_len, ch2, L_CMP_LOOP_NOMATCH);
+ __ j(DONE);
+
+ __ align(OptoLoopAlignment);
+ __ bind(L_CMP_LOOP_LAST_CMP2);
+ __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
+ __ add(result, result, 1);
+ __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH);
+ __ j(DONE);
+
+ __ align(OptoLoopAlignment);
+ __ bind(L_HAS_ZERO_LOOP_NOMATCH);
+ // 1) Restore "result" index. Index was wordSize/str2_chr_size * N until
+ // L_HAS_ZERO block. Byte octet was analyzed in L_HAS_ZERO_LOOP,
+ // so, result was increased at max by wordSize/str2_chr_size - 1, so,
+ // respective high bit wasn't changed. L_LOOP_PROCEED will increase
+ // result by analyzed characters value, so, we can just reset lower bits
+ // in result here. Clear 2 lower bits for UU/UL and 3 bits for LL
+ // 2) restore needle_len and haystack_len values from "compressed" haystack_len
+ // 3) advance haystack value to represent next haystack octet. result & 7/3 is
+ // index of last analyzed substring inside current octet. So, haystack in at
+ // respective start address. We need to advance it to next octet
+ __ andi(match_mask, result, wordSize / haystack_chr_size - 1);
+ __ srli(needle_len, haystack_len, BitsPerByte * wordSize / 2);
+ __ andi(result, result, haystack_isL ? -8 : -4);
+ __ slli(tmp, match_mask, haystack_chr_shift);
+ __ sub(haystack, haystack, tmp);
+ __ addw(haystack_len, haystack_len, zr);
+ __ j(L_LOOP_PROCEED);
+
+ __ align(OptoLoopAlignment);
+ __ bind(NOMATCH);
+ __ mv(result, -1);
+
+ __ bind(DONE);
+ __ pop_reg(spilled_regs, sp);
+ __ ret();
+ return entry;
+ }
+
+ void generate_string_indexof_stubs()
+ {
+ StubRoutines::riscv::_string_indexof_linear_ll = generate_string_indexof_linear(true, true);
+ StubRoutines::riscv::_string_indexof_linear_uu = generate_string_indexof_linear(false, false);
+ StubRoutines::riscv::_string_indexof_linear_ul = generate_string_indexof_linear(true, false);
+ }
+
+#ifdef COMPILER2
+ address generate_mulAdd()
+ {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "mulAdd");
+
+ address entry = __ pc();
+
+ const Register out = x10;
+ const Register in = x11;
+ const Register offset = x12;
+ const Register len = x13;
+ const Register k = x14;
+ const Register tmp = x28;
+
+ BLOCK_COMMENT("Entry:");
+ __ enter();
+ __ mul_add(out, in, offset, len, k, tmp);
+ __ leave();
+ __ ret();
+
+ return entry;
+ }
+
+ /**
+ * Arguments:
+ *
+ * Input:
+ * c_rarg0 - x address
+ * c_rarg1 - x length
+ * c_rarg2 - y address
+ * c_rarg3 - y length
+ * c_rarg4 - z address
+ * c_rarg5 - z length
+ */
+ address generate_multiplyToLen()
+ {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
+ address entry = __ pc();
+
+ const Register x = x10;
+ const Register xlen = x11;
+ const Register y = x12;
+ const Register ylen = x13;
+ const Register z = x14;
+ const Register zlen = x15;
+
+ const Register tmp1 = x16;
+ const Register tmp2 = x17;
+ const Register tmp3 = x7;
+ const Register tmp4 = x28;
+ const Register tmp5 = x29;
+ const Register tmp6 = x30;
+ const Register tmp7 = x31;
+
+ BLOCK_COMMENT("Entry:");
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+ __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret();
+
+ return entry;
+ }
+
+ address generate_squareToLen()
+ {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "squareToLen");
+ address entry = __ pc();
+
+ const Register x = x10;
+ const Register xlen = x11;
+ const Register z = x12;
+ const Register zlen = x13;
+ const Register y = x14; // == x
+ const Register ylen = x15; // == xlen
+
+ const Register tmp1 = x16;
+ const Register tmp2 = x17;
+ const Register tmp3 = x7;
+ const Register tmp4 = x28;
+ const Register tmp5 = x29;
+ const Register tmp6 = x30;
+ const Register tmp7 = x31;
+
+ BLOCK_COMMENT("Entry:");
+ __ enter();
+ __ mv(y, x);
+ __ mv(ylen, xlen);
+ __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
+ __ leave();
+ __ ret();
+
+ return entry;
+ }
+
+ // Arguments:
+ //
+ // Input:
+ // c_rarg0 - newArr address
+ // c_rarg1 - oldArr address
+ // c_rarg2 - newIdx
+ // c_rarg3 - shiftCount
+ // c_rarg4 - numIter
+ //
+ address generate_bigIntegerLeftShift() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "bigIntegerLeftShiftWorker");
+ address entry = __ pc();
+
+ Label loop, exit;
+
+ Register newArr = c_rarg0;
+ Register oldArr = c_rarg1;
+ Register newIdx = c_rarg2;
+ Register shiftCount = c_rarg3;
+ Register numIter = c_rarg4;
+
+ Register shiftRevCount = c_rarg5;
+ Register oldArrNext = t1;
+
+ __ beqz(numIter, exit);
+ __ shadd(newArr, newIdx, newArr, t0, 2);
+
+ __ li(shiftRevCount, 32);
+ __ sub(shiftRevCount, shiftRevCount, shiftCount);
+
+ __ bind(loop);
+ __ addi(oldArrNext, oldArr, 4);
+ __ vsetvli(t0, numIter, Assembler::e32, Assembler::m4);
+ __ vle32_v(v0, oldArr);
+ __ vle32_v(v4, oldArrNext);
+ __ vsll_vx(v0, v0, shiftCount);
+ __ vsrl_vx(v4, v4, shiftRevCount);
+ __ vor_vv(v0, v0, v4);
+ __ vse32_v(v0, newArr);
+ __ sub(numIter, numIter, t0);
+ __ shadd(oldArr, t0, oldArr, t1, 2);
+ __ shadd(newArr, t0, newArr, t1, 2);
+ __ bnez(numIter, loop);
+
+ __ bind(exit);
+ __ ret();
+
+ return entry;
+ }
+
+ // Arguments:
+ //
+ // Input:
+ // c_rarg0 - newArr address
+ // c_rarg1 - oldArr address
+ // c_rarg2 - newIdx
+ // c_rarg3 - shiftCount
+ // c_rarg4 - numIter
+ //
+ address generate_bigIntegerRightShift() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
+ address entry = __ pc();
+
+ Label loop, exit;
+
+ Register newArr = c_rarg0;
+ Register oldArr = c_rarg1;
+ Register newIdx = c_rarg2;
+ Register shiftCount = c_rarg3;
+ Register numIter = c_rarg4;
+ Register idx = numIter;
+
+ Register shiftRevCount = c_rarg5;
+ Register oldArrNext = c_rarg6;
+ Register newArrCur = t0;
+ Register oldArrCur = t1;
+
+ __ beqz(idx, exit);
+ __ shadd(newArr, newIdx, newArr, t0, 2);
+
+ __ li(shiftRevCount, 32);
+ __ sub(shiftRevCount, shiftRevCount, shiftCount);
+
+ __ bind(loop);
+ __ vsetvli(t0, idx, Assembler::e32, Assembler::m4);
+ __ sub(idx, idx, t0);
+ __ shadd(oldArrNext, idx, oldArr, t1, 2);
+ __ shadd(newArrCur, idx, newArr, t1, 2);
+ __ addi(oldArrCur, oldArrNext, 4);
+ __ vle32_v(v0, oldArrCur);
+ __ vle32_v(v4, oldArrNext);
+ __ vsrl_vx(v0, v0, shiftCount);
+ __ vsll_vx(v4, v4, shiftRevCount);
+ __ vor_vv(v0, v0, v4);
+ __ vse32_v(v0, newArrCur);
+ __ bnez(idx, loop);
+
+ __ bind(exit);
+ __ ret();
+
+ return entry;
+ }
+#endif
+
+#ifdef COMPILER2
+ class MontgomeryMultiplyGenerator : public MacroAssembler {
+
+ Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Ra, Rb, Rm, Rn,
+ Pa, Pb, Pn, Pm, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2, Ri, Rj;
+
+ RegSet _toSave;
+ bool _squaring;
+
+ public:
+ MontgomeryMultiplyGenerator (Assembler *as, bool squaring)
+ : MacroAssembler(as->code()), _squaring(squaring) {
+
+ // Register allocation
+
+ Register reg = c_rarg0;
+ Pa_base = reg; // Argument registers
+ if (squaring) {
+ Pb_base = Pa_base;
+ } else {
+ Pb_base = ++reg;
+ }
+ Pn_base = ++reg;
+ Rlen= ++reg;
+ inv = ++reg;
+ Pm_base = ++reg;
+
+ // Working registers:
+ Ra = ++reg; // The current digit of a, b, n, and m.
+ Rb = ++reg;
+ Rm = ++reg;
+ Rn = ++reg;
+
+ Pa = ++reg; // Pointers to the current/next digit of a, b, n, and m.
+ Pb = ++reg;
+ Pm = ++reg;
+ Pn = ++reg;
+
+ tmp0 = ++reg; // Three registers which form a
+ tmp1 = ++reg; // triple-precision accumuator.
+ tmp2 = ++reg;
+
+ Ri = x6; // Inner and outer loop indexes.
+ Rj = x7;
+
+ Rhi_ab = x28; // Product registers: low and high parts
+ Rlo_ab = x29; // of a*b and m*n.
+ Rhi_mn = x30;
+ Rlo_mn = x31;
+
+ // x18 and up are callee-saved.
+ _toSave = RegSet::range(x18, reg) + Pm_base;
+ }
+
+ private:
+ void save_regs() {
+ push_reg(_toSave, sp);
+ }
+
+ void restore_regs() {
+ pop_reg(_toSave, sp);
+ }
+
+ template
+ void unroll_2(Register count, T block) {
+ Label loop, end, odd;
+ beqz(count, end);
+ andi(t0, count, 0x1);
+ bnez(t0, odd);
+ align(16);
+ bind(loop);
+ (this->*block)();
+ bind(odd);
+ (this->*block)();
+ addi(count, count, -2);
+ bgtz(count, loop);
+ bind(end);
+ }
+
+ template
+ void unroll_2(Register count, T block, Register d, Register s, Register tmp) {
+ Label loop, end, odd;
+ beqz(count, end);
+ andi(tmp, count, 0x1);
+ bnez(tmp, odd);
+ align(16);
+ bind(loop);
+ (this->*block)(d, s, tmp);
+ bind(odd);
+ (this->*block)(d, s, tmp);
+ addi(count, count, -2);
+ bgtz(count, loop);
+ bind(end);
+ }
+
+ void pre1(RegisterOrConstant i) {
+ block_comment("pre1");
+ // Pa = Pa_base;
+ // Pb = Pb_base + i;
+ // Pm = Pm_base;
+ // Pn = Pn_base + i;
+ // Ra = *Pa;
+ // Rb = *Pb;
+ // Rm = *Pm;
+ // Rn = *Pn;
+ if (i.is_register()) {
+ slli(t0, i.as_register(), LogBytesPerWord);
+ } else {
+ mv(t0, i.as_constant());
+ slli(t0, t0, LogBytesPerWord);
+ }
+
+ mv(Pa, Pa_base);
+ add(Pb, Pb_base, t0);
+ mv(Pm, Pm_base);
+ add(Pn, Pn_base, t0);
+
+ ld(Ra, Address(Pa));
+ ld(Rb, Address(Pb));
+ ld(Rm, Address(Pm));
+ ld(Rn, Address(Pn));
+
+ // Zero the m*n result.
+ mv(Rhi_mn, zr);
+ mv(Rlo_mn, zr);
+ }
+
+ // The core multiply-accumulate step of a Montgomery
+ // multiplication. The idea is to schedule operations as a
+ // pipeline so that instructions with long latencies (loads and
+ // multiplies) have time to complete before their results are
+ // used. This most benefits in-order implementations of the
+ // architecture but out-of-order ones also benefit.
+ void step() {
+ block_comment("step");
+ // MACC(Ra, Rb, tmp0, tmp1, tmp2);
+ // Ra = *++Pa;
+ // Rb = *--Pb;
+ mulhu(Rhi_ab, Ra, Rb);
+ mul(Rlo_ab, Ra, Rb);
+ addi(Pa, Pa, wordSize);
+ ld(Ra, Address(Pa));
+ addi(Pb, Pb, -wordSize);
+ ld(Rb, Address(Pb));
+ acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n from the
+ // previous iteration.
+ // MACC(Rm, Rn, tmp0, tmp1, tmp2);
+ // Rm = *++Pm;
+ // Rn = *--Pn;
+ mulhu(Rhi_mn, Rm, Rn);
+ mul(Rlo_mn, Rm, Rn);
+ addi(Pm, Pm, wordSize);
+ ld(Rm, Address(Pm));
+ addi(Pn, Pn, -wordSize);
+ ld(Rn, Address(Pn));
+ acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
+ }
+
+ void post1() {
+ block_comment("post1");
+
+ // MACC(Ra, Rb, tmp0, tmp1, tmp2);
+ // Ra = *++Pa;
+ // Rb = *--Pb;
+ mulhu(Rhi_ab, Ra, Rb);
+ mul(Rlo_ab, Ra, Rb);
+ acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n
+ acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
+
+ // *Pm = Rm = tmp0 * inv;
+ mul(Rm, tmp0, inv);
+ sd(Rm, Address(Pm));
+
+ // MACC(Rm, Rn, tmp0, tmp1, tmp2);
+ // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
+ mulhu(Rhi_mn, Rm, Rn);
+
+#ifndef PRODUCT
+ // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply");
+ {
+ mul(Rlo_mn, Rm, Rn);
+ add(Rlo_mn, tmp0, Rlo_mn);
+ Label ok;
+ beqz(Rlo_mn, ok);
+ stop("broken Montgomery multiply");
+ bind(ok);
+ }
+#endif
+ // We have very carefully set things up so that
+ // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate
+ // the lower half of Rm * Rn because we know the result already:
+ // it must be -tmp0. tmp0 + (-tmp0) must generate a carry iff
+ // tmp0 != 0. So, rather than do a mul and an cad we just set
+ // the carry flag iff tmp0 is nonzero.
+ //
+ // mul(Rlo_mn, Rm, Rn);
+ // cad(zr, tmp0, Rlo_mn);
+ addi(t0, tmp0, -1);
+ sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero
+ cadc(tmp0, tmp1, Rhi_mn, t0);
+ adc(tmp1, tmp2, zr, t0);
+ mv(tmp2, zr);
+ }
+
+ void pre2(Register i, Register len) {
+ block_comment("pre2");
+ // Pa = Pa_base + i-len;
+ // Pb = Pb_base + len;
+ // Pm = Pm_base + i-len;
+ // Pn = Pn_base + len;
+
+ sub(Rj, i, len);
+ // Rj == i-len
+
+ // Ra as temp register
+ slli(Ra, Rj, LogBytesPerWord);
+ add(Pa, Pa_base, Ra);
+ add(Pm, Pm_base, Ra);
+ slli(Ra, len, LogBytesPerWord);
+ add(Pb, Pb_base, Ra);
+ add(Pn, Pn_base, Ra);
+
+ // Ra = *++Pa;
+ // Rb = *--Pb;
+ // Rm = *++Pm;
+ // Rn = *--Pn;
+ add(Pa, Pa, wordSize);
+ ld(Ra, Address(Pa));
+ add(Pb, Pb, -wordSize);
+ ld(Rb, Address(Pb));
+ add(Pm, Pm, wordSize);
+ ld(Rm, Address(Pm));
+ add(Pn, Pn, -wordSize);
+ ld(Rn, Address(Pn));
+
+ mv(Rhi_mn, zr);
+ mv(Rlo_mn, zr);
+ }
+
+ void post2(Register i, Register len) {
+ block_comment("post2");
+ sub(Rj, i, len);
+
+ cad(tmp0, tmp0, Rlo_mn, t0); // The pending m*n, low part
+
+ // As soon as we know the least significant digit of our result,
+ // store it.
+ // Pm_base[i-len] = tmp0;
+ // Rj as temp register
+ slli(Rj, Rj, LogBytesPerWord);
+ add(Rj, Pm_base, Rj);
+ sd(tmp0, Address(Rj));
+
+ // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
+ cadc(tmp0, tmp1, Rhi_mn, t0); // The pending m*n, high part
+ adc(tmp1, tmp2, zr, t0);
+ mv(tmp2, zr);
+ }
+
+ // A carry in tmp0 after Montgomery multiplication means that we
+ // should subtract multiples of n from our result in m. We'll
+ // keep doing that until there is no carry.
+ void normalize(Register len) {
+ block_comment("normalize");
+ // while (tmp0)
+ // tmp0 = sub(Pm_base, Pn_base, tmp0, len);
+ Label loop, post, again;
+ Register cnt = tmp1, i = tmp2; // Re-use registers; we're done with them now
+ beqz(tmp0, post); {
+ bind(again); {
+ mv(i, zr);
+ mv(cnt, len);
+ slli(Rn, i, LogBytesPerWord);
+ add(Rm, Pm_base, Rn);
+ ld(Rm, Address(Rm));
+ add(Rn, Pn_base, Rn);
+ ld(Rn, Address(Rn));
+ li(t0, 1); // set carry flag, i.e. no borrow
+ align(16);
+ bind(loop); {
+ notr(Rn, Rn);
+ add(Rm, Rm, t0);
+ add(Rm, Rm, Rn);
+ sltu(t0, Rm, Rn);
+ slli(Rn, i, LogBytesPerWord); // Rn as temp register
+ add(Rn, Pm_base, Rn);
+ sd(Rm, Address(Rn));
+ add(i, i, 1);
+ slli(Rn, i, LogBytesPerWord);
+ add(Rm, Pm_base, Rn);
+ ld(Rm, Address(Rm));
+ add(Rn, Pn_base, Rn);
+ ld(Rn, Address(Rn));
+ sub(cnt, cnt, 1);
+ } bnez(cnt, loop);
+ addi(tmp0, tmp0, -1);
+ add(tmp0, tmp0, t0);
+ } bnez(tmp0, again);
+ } bind(post);
+ }
+
+ // Move memory at s to d, reversing words.
+ // Increments d to end of copied memory
+ // Destroys tmp1, tmp2
+ // Preserves len
+ // Leaves s pointing to the address which was in d at start
+ void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) {
+ assert(tmp1 < x28 && tmp2 < x28, "register corruption");
+
+ slli(tmp1, len, LogBytesPerWord);
+ add(s, s, tmp1);
+ mv(tmp1, len);
+ unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2);
+ slli(tmp1, len, LogBytesPerWord);
+ sub(s, d, tmp1);
+ }
+ // [63...0] -> [31...0][63...32]
+ void reverse1(Register d, Register s, Register tmp) {
+ addi(s, s, -wordSize);
+ ld(tmp, Address(s));
+ ror_imm(tmp, tmp, 32, t0);
+ sd(tmp, Address(d));
+ addi(d, d, wordSize);
+ }
+
+ void step_squaring() {
+ // An extra ACC
+ step();
+ acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
+ }
+
+ void last_squaring(Register i) {
+ Label dont;
+ // if ((i & 1) == 0) {
+ andi(t0, i, 0x1);
+ bnez(t0, dont); {
+ // MACC(Ra, Rb, tmp0, tmp1, tmp2);
+ // Ra = *++Pa;
+ // Rb = *--Pb;
+ mulhu(Rhi_ab, Ra, Rb);
+ mul(Rlo_ab, Ra, Rb);
+ acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
+ } bind(dont);
+ }
+
+ void extra_step_squaring() {
+ acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n
+
+ // MACC(Rm, Rn, tmp0, tmp1, tmp2);
+ // Rm = *++Pm;
+ // Rn = *--Pn;
+ mulhu(Rhi_mn, Rm, Rn);
+ mul(Rlo_mn, Rm, Rn);
+ addi(Pm, Pm, wordSize);
+ ld(Rm, Address(Pm));
+ addi(Pn, Pn, -wordSize);
+ ld(Rn, Address(Pn));
+ }
+
+ void post1_squaring() {
+ acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n
+
+ // *Pm = Rm = tmp0 * inv;
+ mul(Rm, tmp0, inv);
+ sd(Rm, Address(Pm));
+
+ // MACC(Rm, Rn, tmp0, tmp1, tmp2);
+ // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
+ mulhu(Rhi_mn, Rm, Rn);
+
+#ifndef PRODUCT
+ // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply");
+ {
+ mul(Rlo_mn, Rm, Rn);
+ add(Rlo_mn, tmp0, Rlo_mn);
+ Label ok;
+ beqz(Rlo_mn, ok); {
+ stop("broken Montgomery multiply");
+ } bind(ok);
+ }
+#endif
+ // We have very carefully set things up so that
+ // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate
+ // the lower half of Rm * Rn because we know the result already:
+ // it must be -tmp0. tmp0 + (-tmp0) must generate a carry iff
+ // tmp0 != 0. So, rather than do a mul and a cad we just set
+ // the carry flag iff tmp0 is nonzero.
+ //
+ // mul(Rlo_mn, Rm, Rn);
+ // cad(zr, tmp, Rlo_mn);
+ addi(t0, tmp0, -1);
+ sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero
+ cadc(tmp0, tmp1, Rhi_mn, t0);
+ adc(tmp1, tmp2, zr, t0);
+ mv(tmp2, zr);
+ }
+
+ // use t0 as carry
+ void acc(Register Rhi, Register Rlo,
+ Register tmp0, Register tmp1, Register tmp2) {
+ cad(tmp0, tmp0, Rlo, t0);
+ cadc(tmp1, tmp1, Rhi, t0);
+ adc(tmp2, tmp2, zr, t0);
+ }
+
+ public:
+ /**
+ * Fast Montgomery multiplication. The derivation of the
+ * algorithm is in A Cryptographic Library for the Motorola
+ * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
+ *
+ * Arguments:
+ *
+ * Inputs for multiplication:
+ * c_rarg0 - int array elements a
+ * c_rarg1 - int array elements b
+ * c_rarg2 - int array elements n (the modulus)
+ * c_rarg3 - int length
+ * c_rarg4 - int inv
+ * c_rarg5 - int array elements m (the result)
+ *
+ * Inputs for squaring:
+ * c_rarg0 - int array elements a
+ * c_rarg1 - int array elements n (the modulus)
+ * c_rarg2 - int length
+ * c_rarg3 - int inv
+ * c_rarg4 - int array elements m (the result)
+ *
+ */
+ address generate_multiply() {
+ Label argh, nothing;
+ bind(argh);
+ stop("MontgomeryMultiply total_allocation must be <= 8192");
+
+ align(CodeEntryAlignment);
+ address entry = pc();
+
+ beqz(Rlen, nothing);
+
+ enter();
+
+ // Make room.
+ li(Ra, 512);
+ bgt(Rlen, Ra, argh);
+ slli(Ra, Rlen, exact_log2(4 * sizeof(jint)));
+ sub(Ra, sp, Ra);
+ andi(sp, Ra, -2 * wordSize);
+
+ srliw(Rlen, Rlen, 1); // length in longwords = len/2
+
+ {
+ // Copy input args, reversing as we go. We use Ra as a
+ // temporary variable.
+ reverse(Ra, Pa_base, Rlen, Ri, Rj);
+ if (!_squaring)
+ reverse(Ra, Pb_base, Rlen, Ri, Rj);
+ reverse(Ra, Pn_base, Rlen, Ri, Rj);
+ }
+
+ // Push all call-saved registers and also Pm_base which we'll need
+ // at the end.
+ save_regs();
+
+#ifndef PRODUCT
+ // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
+ {
+ ld(Rn, Address(Pn_base));
+ mul(Rlo_mn, Rn, inv);
+ li(t0, -1);
+ Label ok;
+ beq(Rlo_mn, t0, ok);
+ stop("broken inverse in Montgomery multiply");
+ bind(ok);
+ }
+#endif
+
+ mv(Pm_base, Ra);
+
+ mv(tmp0, zr);
+ mv(tmp1, zr);
+ mv(tmp2, zr);
+
+ block_comment("for (int i = 0; i < len; i++) {");
+ mv(Ri, zr); {
+ Label loop, end;
+ bge(Ri, Rlen, end);
+
+ bind(loop);
+ pre1(Ri);
+
+ block_comment(" for (j = i; j; j--) {"); {
+ mv(Rj, Ri);
+ unroll_2(Rj, &MontgomeryMultiplyGenerator::step);
+ } block_comment(" } // j");
+
+ post1();
+ addw(Ri, Ri, 1);
+ blt(Ri, Rlen, loop);
+ bind(end);
+ block_comment("} // i");
+ }
+
+ block_comment("for (int i = len; i < 2*len; i++) {");
+ mv(Ri, Rlen); {
+ Label loop, end;
+ slli(t0, Rlen, 1);
+ bge(Ri, t0, end);
+
+ bind(loop);
+ pre2(Ri, Rlen);
+
+ block_comment(" for (j = len*2-i-1; j; j--) {"); {
+ slliw(Rj, Rlen, 1);
+ subw(Rj, Rj, Ri);
+ subw(Rj, Rj, 1);
+ unroll_2(Rj, &MontgomeryMultiplyGenerator::step);
+ } block_comment(" } // j");
+
+ post2(Ri, Rlen);
+ addw(Ri, Ri, 1);
+ slli(t0, Rlen, 1);
+ blt(Ri, t0, loop);
+ bind(end);
+ }
+ block_comment("} // i");
+
+ normalize(Rlen);
+
+ mv(Ra, Pm_base); // Save Pm_base in Ra
+ restore_regs(); // Restore caller's Pm_base
+
+ // Copy our result into caller's Pm_base
+ reverse(Pm_base, Ra, Rlen, Ri, Rj);
+
+ leave();
+ bind(nothing);
+ ret();
+
+ return entry;
+ }
+
+ /**
+ *
+ * Arguments:
+ *
+ * Inputs:
+ * c_rarg0 - int array elements a
+ * c_rarg1 - int array elements n (the modulus)
+ * c_rarg2 - int length
+ * c_rarg3 - int inv
+ * c_rarg4 - int array elements m (the result)
+ *
+ */
+ address generate_square() {
+ Label argh;
+ bind(argh);
+ stop("MontgomeryMultiply total_allocation must be <= 8192");
+
+ align(CodeEntryAlignment);
+ address entry = pc();
+
+ enter();
+
+ // Make room.
+ li(Ra, 512);
+ bgt(Rlen, Ra, argh);
+ slli(Ra, Rlen, exact_log2(4 * sizeof(jint)));
+ sub(Ra, sp, Ra);
+ andi(sp, Ra, -2 * wordSize);
+
+ srliw(Rlen, Rlen, 1); // length in longwords = len/2
+
+ {
+ // Copy input args, reversing as we go. We use Ra as a
+ // temporary variable.
+ reverse(Ra, Pa_base, Rlen, Ri, Rj);
+ reverse(Ra, Pn_base, Rlen, Ri, Rj);
+ }
+
+ // Push all call-saved registers and also Pm_base which we'll need
+ // at the end.
+ save_regs();
+
+ mv(Pm_base, Ra);
+
+ mv(tmp0, zr);
+ mv(tmp1, zr);
+ mv(tmp2, zr);
+
+ block_comment("for (int i = 0; i < len; i++) {");
+ mv(Ri, zr); {
+ Label loop, end;
+ bind(loop);
+ bge(Ri, Rlen, end);
+
+ pre1(Ri);
+
+ block_comment("for (j = (i+1)/2; j; j--) {"); {
+ addi(Rj, Ri, 1);
+ srliw(Rj, Rj, 1);
+ unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring);
+ } block_comment(" } // j");
+
+ last_squaring(Ri);
+
+ block_comment(" for (j = i/2; j; j--) {"); {
+ srliw(Rj, Ri, 1);
+ unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring);
+ } block_comment(" } // j");
+
+ post1_squaring();
+ addi(Ri, Ri, 1);
+ blt(Ri, Rlen, loop);
+
+ bind(end);
+ block_comment("} // i");
+ }
+
+ block_comment("for (int i = len; i < 2*len; i++) {");
+ mv(Ri, Rlen); {
+ Label loop, end;
+ bind(loop);
+ slli(t0, Rlen, 1);
+ bge(Ri, t0, end);
+
+ pre2(Ri, Rlen);
+
+ block_comment(" for (j = (2*len-i-1)/2; j; j--) {"); {
+ slli(Rj, Rlen, 1);
+ sub(Rj, Rj, Ri);
+ sub(Rj, Rj, 1);
+ srliw(Rj, Rj, 1);
+ unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring);
+ } block_comment(" } // j");
+
+ last_squaring(Ri);
+
+ block_comment(" for (j = (2*len-i)/2; j; j--) {"); {
+ slli(Rj, Rlen, 1);
+ sub(Rj, Rj, Ri);
+ srliw(Rj, Rj, 1);
+ unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring);
+ } block_comment(" } // j");
+
+ post2(Ri, Rlen);
+ addi(Ri, Ri, 1);
+ slli(t0, Rlen, 1);
+ blt(Ri, t0, loop);
+
+ bind(end);
+ block_comment("} // i");
+ }
+
+ normalize(Rlen);
+
+ mv(Ra, Pm_base); // Save Pm_base in Ra
+ restore_regs(); // Restore caller's Pm_base
+
+ // Copy our result into caller's Pm_base
+ reverse(Pm_base, Ra, Rlen, Ri, Rj);
+
+ leave();
+ ret();
+
+ return entry;
+ }
+ };
+#endif // COMPILER2
+
+ // Continuation point for throwing of implicit exceptions that are
+ // not handled in the current activation. Fabricates an exception
+ // oop and initiates normal exception dispatching in this
+ // frame. Since we need to preserve callee-saved values (currently
+ // only for C2, but done for C1 as well) we need a callee-saved oop
+ // map and therefore have to make these stubs into RuntimeStubs
+ // rather than BufferBlobs. If the compiler needs all registers to
+ // be preserved between the fault point and the exception handler
+ // then it must assume responsibility for that in
+ // AbstractCompiler::continuation_for_implicit_null_exception or
+ // continuation_for_implicit_division_by_zero_exception. All other
+ // implicit exceptions (e.g., NullPointerException or
+ // AbstractMethodError on entry) are either at call sites or
+ // otherwise assume that stack unwinding will be initiated, so
+ // caller saved registers were assumed volatile in the compiler.
+
+#undef __
+#define __ masm->
+
+ address generate_throw_exception(const char* name,
+ address runtime_entry,
+ Register arg1 = noreg,
+ Register arg2 = noreg) {
+ // Information about frame layout at time of blocking runtime call.
+ // Note that we only have to preserve callee-saved registers since
+ // the compilers are responsible for supplying a continuation point
+ // if they expect all registers to be preserved.
+ // n.b. riscv asserts that frame::arg_reg_save_area_bytes == 0
+ assert_cond(runtime_entry != NULL);
+ enum layout {
+ fp_off = 0,
+ fp_off2,
+ return_off,
+ return_off2,
+ framesize // inclusive of return address
+ };
+
+ const int insts_size = 512;
+ const int locs_size = 64;
+
+ CodeBuffer code(name, insts_size, locs_size);
+ OopMapSet* oop_maps = new OopMapSet();
+ MacroAssembler* masm = new MacroAssembler(&code);
+ assert_cond(oop_maps != NULL && masm != NULL);
+
+ address start = __ pc();
+
+ // This is an inlined and slightly modified version of call_VM
+ // which has the ability to fetch the return PC out of
+ // thread-local storage and also sets up last_Java_sp slightly
+ // differently than the real call_VM
+
+ __ enter(); // Save FP and RA before call
+
+ assert(is_even(framesize / 2), "sp not 16-byte aligned");
+
+ // ra and fp are already in place
+ __ addi(sp, fp, 0 - ((unsigned)framesize << LogBytesPerInt)); // prolog
+
+ int frame_complete = __ pc() - start;
+
+ // Set up last_Java_sp and last_Java_fp
+ address the_pc = __ pc();
+ __ set_last_Java_frame(sp, fp, the_pc, t0);
+
+ // Call runtime
+ if (arg1 != noreg) {
+ assert(arg2 != c_rarg1, "clobbered");
+ __ mv(c_rarg1, arg1);
+ }
+ if (arg2 != noreg) {
+ __ mv(c_rarg2, arg2);
+ }
+ __ mv(c_rarg0, xthread);
+ BLOCK_COMMENT("call runtime_entry");
+ int32_t offset = 0;
+ __ movptr_with_offset(t0, runtime_entry, offset);
+ __ jalr(x1, t0, offset);
+
+ // Generate oop map
+ OopMap* map = new OopMap(framesize, 0);
+ assert_cond(map != NULL);
+
+ oop_maps->add_gc_map(the_pc - start, map);
+
+ __ reset_last_Java_frame(true);
+
+ __ leave();
+
+ // check for pending exceptions
+#ifdef ASSERT
+ Label L;
+ __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
+ __ bnez(t0, L);
+ __ should_not_reach_here();
+ __ bind(L);
+#endif // ASSERT
+ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+
+
+ // codeBlob framesize is in words (not VMRegImpl::slot_size)
+ RuntimeStub* stub =
+ RuntimeStub::new_runtime_stub(name,
+ &code,
+ frame_complete,
+ (framesize >> (LogBytesPerWord - LogBytesPerInt)),
+ oop_maps, false);
+ assert(stub != NULL, "create runtime stub fail!");
+ return stub->entry_point();
+ }
+
+ // Initialization
+ void generate_initial() {
+ // Generate initial stubs and initializes the entry points
+
+ // entry points that exist in all platforms Note: This is code
+ // that could be shared among different platforms - however the
+ // benefit seems to be smaller than the disadvantage of having a
+ // much more complicated generator structure. See also comment in
+ // stubRoutines.hpp.
+
+ StubRoutines::_forward_exception_entry = generate_forward_exception();
+
+ StubRoutines::_call_stub_entry =
+ generate_call_stub(StubRoutines::_call_stub_return_address);
+
+ // is referenced by megamorphic call
+ StubRoutines::_catch_exception_entry = generate_catch_exception();
+
+ // Build this early so it's available for the interpreter.
+ StubRoutines::_throw_StackOverflowError_entry =
+ generate_throw_exception("StackOverflowError throw_exception",
+ CAST_FROM_FN_PTR(address,
+ SharedRuntime::throw_StackOverflowError));
+ StubRoutines::_throw_delayed_StackOverflowError_entry =
+ generate_throw_exception("delayed StackOverflowError throw_exception",
+ CAST_FROM_FN_PTR(address,
+ SharedRuntime::throw_delayed_StackOverflowError));
+ // Safefetch stubs.
+ generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
+ &StubRoutines::_safefetch32_fault_pc,
+ &StubRoutines::_safefetch32_continuation_pc);
+ generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
+ &StubRoutines::_safefetchN_fault_pc,
+ &StubRoutines::_safefetchN_continuation_pc);
+ }
+
+ void generate_all() {
+ // support for verify_oop (must happen after universe_init)
+ StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
+ StubRoutines::_throw_AbstractMethodError_entry =
+ generate_throw_exception("AbstractMethodError throw_exception",
+ CAST_FROM_FN_PTR(address,
+ SharedRuntime::
+ throw_AbstractMethodError));
+
+ StubRoutines::_throw_IncompatibleClassChangeError_entry =
+ generate_throw_exception("IncompatibleClassChangeError throw_exception",
+ CAST_FROM_FN_PTR(address,
+ SharedRuntime::
+ throw_IncompatibleClassChangeError));
+
+ StubRoutines::_throw_NullPointerException_at_call_entry =
+ generate_throw_exception("NullPointerException at call throw_exception",
+ CAST_FROM_FN_PTR(address,
+ SharedRuntime::
+ throw_NullPointerException_at_call));
+ // arraycopy stubs used by compilers
+ generate_arraycopy_stubs();
+
+#ifdef COMPILER2
+ if (UseMulAddIntrinsic) {
+ StubRoutines::_mulAdd = generate_mulAdd();
+ }
+
+ if (UseMultiplyToLenIntrinsic) {
+ StubRoutines::_multiplyToLen = generate_multiplyToLen();
+ }
+
+ if (UseSquareToLenIntrinsic) {
+ StubRoutines::_squareToLen = generate_squareToLen();
+ }
+
+ if (UseMontgomeryMultiplyIntrinsic) {
+ StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
+ MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);
+ StubRoutines::_montgomeryMultiply = g.generate_multiply();
+ }
+
+ if (UseMontgomerySquareIntrinsic) {
+ StubCodeMark mark(this, "StubRoutines", "montgomerySquare");
+ MontgomeryMultiplyGenerator g(_masm, /*squaring*/true);
+ StubRoutines::_montgomerySquare = g.generate_square();
+ }
+
+ if (UseRVVForBigIntegerShiftIntrinsics) {
+ StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift();
+ StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift();
+ }
+#endif
+
+ generate_compare_long_strings();
+
+ generate_string_indexof_stubs();
+
+ BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
+ if (bs_nm != NULL) {
+ StubRoutines::riscv::_method_entry_barrier = generate_method_entry_barrier();
+ }
+
+ StubRoutines::riscv::set_completed();
+ }
+
+ public:
+ StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
+ if (all) {
+ generate_all();
+ } else {
+ generate_initial();
+ }
+ }
+
+ ~StubGenerator() {}
+}; // end class declaration
+
+#define UCM_TABLE_MAX_ENTRIES 8
+void StubGenerator_generate(CodeBuffer* code, bool all) {
+ if (UnsafeCopyMemory::_table == NULL) {
+ UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
+ }
+
+ StubGenerator g(code, all);
+}
diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..395a2d338e4c32424fcfd0507c309695eeca6e3b
--- /dev/null
+++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+// Implementation of the platform-specific part of StubRoutines - for
+// a description of how to extend it, see the stubRoutines.hpp file.
+
+address StubRoutines::riscv::_get_previous_sp_entry = NULL;
+
+address StubRoutines::riscv::_f2i_fixup = NULL;
+address StubRoutines::riscv::_f2l_fixup = NULL;
+address StubRoutines::riscv::_d2i_fixup = NULL;
+address StubRoutines::riscv::_d2l_fixup = NULL;
+address StubRoutines::riscv::_float_sign_mask = NULL;
+address StubRoutines::riscv::_float_sign_flip = NULL;
+address StubRoutines::riscv::_double_sign_mask = NULL;
+address StubRoutines::riscv::_double_sign_flip = NULL;
+address StubRoutines::riscv::_zero_blocks = NULL;
+address StubRoutines::riscv::_compare_long_string_LL = NULL;
+address StubRoutines::riscv::_compare_long_string_UU = NULL;
+address StubRoutines::riscv::_compare_long_string_LU = NULL;
+address StubRoutines::riscv::_compare_long_string_UL = NULL;
+address StubRoutines::riscv::_string_indexof_linear_ll = NULL;
+address StubRoutines::riscv::_string_indexof_linear_uu = NULL;
+address StubRoutines::riscv::_string_indexof_linear_ul = NULL;
+address StubRoutines::riscv::_large_byte_array_inflate = NULL;
+address StubRoutines::riscv::_method_entry_barrier = NULL;
+
+bool StubRoutines::riscv::_completed = false;
diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..51f07819c33a125232b94b05692fed727b10e068
--- /dev/null
+++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_STUBROUTINES_RISCV_HPP
+#define CPU_RISCV_STUBROUTINES_RISCV_HPP
+
+// This file holds the platform specific parts of the StubRoutines
+// definition. See stubRoutines.hpp for a description on how to
+// extend it.
+
+static bool returns_to_call_stub(address return_pc) {
+ return return_pc == _call_stub_return_address;
+}
+
+enum platform_dependent_constants {
+ code_size1 = 19000, // simply increase if too small (assembler will crash if too small)
+ code_size2 = 28000 // simply increase if too small (assembler will crash if too small)
+};
+
+class riscv {
+ friend class StubGenerator;
+
+ private:
+ static address _get_previous_sp_entry;
+
+ static address _f2i_fixup;
+ static address _f2l_fixup;
+ static address _d2i_fixup;
+ static address _d2l_fixup;
+
+ static address _float_sign_mask;
+ static address _float_sign_flip;
+ static address _double_sign_mask;
+ static address _double_sign_flip;
+
+ static address _zero_blocks;
+
+ static address _compare_long_string_LL;
+ static address _compare_long_string_LU;
+ static address _compare_long_string_UL;
+ static address _compare_long_string_UU;
+ static address _string_indexof_linear_ll;
+ static address _string_indexof_linear_uu;
+ static address _string_indexof_linear_ul;
+ static address _large_byte_array_inflate;
+
+ static address _method_entry_barrier;
+
+ static bool _completed;
+
+ public:
+
+ static address get_previous_sp_entry() {
+ return _get_previous_sp_entry;
+ }
+
+ static address f2i_fixup() {
+ return _f2i_fixup;
+ }
+
+ static address f2l_fixup() {
+ return _f2l_fixup;
+ }
+
+ static address d2i_fixup() {
+ return _d2i_fixup;
+ }
+
+ static address d2l_fixup() {
+ return _d2l_fixup;
+ }
+
+ static address float_sign_mask() {
+ return _float_sign_mask;
+ }
+
+ static address float_sign_flip() {
+ return _float_sign_flip;
+ }
+
+ static address double_sign_mask() {
+ return _double_sign_mask;
+ }
+
+ static address double_sign_flip() {
+ return _double_sign_flip;
+ }
+
+ static address zero_blocks() {
+ return _zero_blocks;
+ }
+
+ static address compare_long_string_LL() {
+ return _compare_long_string_LL;
+ }
+
+ static address compare_long_string_LU() {
+ return _compare_long_string_LU;
+ }
+
+ static address compare_long_string_UL() {
+ return _compare_long_string_UL;
+ }
+
+ static address compare_long_string_UU() {
+ return _compare_long_string_UU;
+ }
+
+ static address string_indexof_linear_ul() {
+ return _string_indexof_linear_ul;
+ }
+
+ static address string_indexof_linear_ll() {
+ return _string_indexof_linear_ll;
+ }
+
+ static address string_indexof_linear_uu() {
+ return _string_indexof_linear_uu;
+ }
+
+ static address large_byte_array_inflate() {
+ return _large_byte_array_inflate;
+ }
+
+ static address method_entry_barrier() {
+ return _method_entry_barrier;
+ }
+
+ static bool complete() {
+ return _completed;
+ }
+
+ static void set_completed() {
+ _completed = true;
+ }
+};
+
+#endif // CPU_RISCV_STUBROUTINES_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6537b2dbd94137f8a889d1ad728fa185adf01f4b
--- /dev/null
+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
@@ -0,0 +1,1794 @@
+/*
+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
+#include "interpreter/bytecodeHistogram.hpp"
+#include "interpreter/bytecodeTracer.hpp"
+#include "interpreter/interp_masm.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "interpreter/templateInterpreterGenerator.hpp"
+#include "interpreter/templateTable.hpp"
+#include "memory/resourceArea.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/method.hpp"
+#include "oops/methodData.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "runtime/arguments.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/jniHandles.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+#include "runtime/timer.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/powerOfTwo.hpp"
+#include
+
+#ifndef PRODUCT
+#include "oops/method.hpp"
+#endif // !PRODUCT
+
+// Size of interpreter code. Increase if too small. Interpreter will
+// fail with a guarantee ("not enough space for interpreter generation");
+// if too small.
+// Run with +PrintInterpreter to get the VM to print out the size.
+// Max size with JVMTI
+int TemplateInterpreter::InterpreterCodeSize = 256 * 1024;
+
+#define __ _masm->
+
+//-----------------------------------------------------------------------------
+
+address TemplateInterpreterGenerator::generate_slow_signature_handler() {
+ address entry = __ pc();
+
+ __ andi(esp, esp, -16);
+ __ mv(c_rarg3, esp);
+ // xmethod
+ // xlocals
+ // c_rarg3: first stack arg - wordSize
+ // adjust sp
+
+ __ addi(sp, c_rarg3, -18 * wordSize);
+ __ addi(sp, sp, -2 * wordSize);
+ __ sd(ra, Address(sp, 0));
+
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::slow_signature_handler),
+ xmethod, xlocals, c_rarg3);
+
+ // x10: result handler
+
+ // Stack layout:
+ // sp: return address <- sp
+ // 1 garbage
+ // 8 integer args (if static first is unused)
+ // 1 float/double identifiers
+ // 8 double args
+ // stack args <- esp
+ // garbage
+ // expression stack bottom
+ // bcp (NULL)
+ // ...
+
+ // Restore ra
+ __ ld(ra, Address(sp, 0));
+ __ addi(sp, sp , 2 * wordSize);
+
+ // Do FP first so we can use c_rarg3 as temp
+ __ lwu(c_rarg3, Address(sp, 9 * wordSize)); // float/double identifiers
+
+ for (int i = 0; i < Argument::n_float_register_parameters_c; i++) {
+ const FloatRegister r = g_FPArgReg[i];
+ Label d, done;
+
+ __ andi(t0, c_rarg3, 1UL << i);
+ __ bnez(t0, d);
+ __ flw(r, Address(sp, (10 + i) * wordSize));
+ __ j(done);
+ __ bind(d);
+ __ fld(r, Address(sp, (10 + i) * wordSize));
+ __ bind(done);
+ }
+
+ // c_rarg0 contains the result from the call of
+ // InterpreterRuntime::slow_signature_handler so we don't touch it
+ // here. It will be loaded with the JNIEnv* later.
+ for (int i = 1; i < Argument::n_int_register_parameters_c; i++) {
+ const Register rm = g_INTArgReg[i];
+ __ ld(rm, Address(sp, i * wordSize));
+ }
+
+ __ addi(sp, sp, 18 * wordSize);
+ __ ret();
+
+ return entry;
+}
+
+// Various method entries
+address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
+ // xmethod: Method*
+ // x30: sender sp
+ // esp: args
+
+ if (!InlineIntrinsics) {
+ return NULL; // Generate a vanilla entry
+ }
+
+ // These don't need a safepoint check because they aren't virtually
+ // callable. We won't enter these intrinsics from compiled code.
+ // If in the future we added an intrinsic which was virtually callable
+ // we'd have to worry about how to safepoint so that this code is used.
+
+ // mathematical functions inlined by compiler
+ // (interpreter must provide identical implementation
+ // in order to avoid monotonicity bugs when switching
+ // from interpreter to compiler in the middle of some
+ // computation)
+ //
+ // stack:
+ // [ arg ] <-- esp
+ // [ arg ]
+ // retaddr in ra
+
+ address fn = NULL;
+ address entry_point = NULL;
+ Register continuation = ra;
+ switch (kind) {
+ case Interpreter::java_lang_math_abs:
+ entry_point = __ pc();
+ __ fld(f10, Address(esp));
+ __ fabs_d(f10, f10);
+ __ mv(sp, x30); // Restore caller's SP
+ break;
+ case Interpreter::java_lang_math_sqrt:
+ entry_point = __ pc();
+ __ fld(f10, Address(esp));
+ __ fsqrt_d(f10, f10);
+ __ mv(sp, x30);
+ break;
+ case Interpreter::java_lang_math_sin :
+ entry_point = __ pc();
+ __ fld(f10, Address(esp));
+ __ mv(sp, x30);
+ __ mv(x9, ra);
+ continuation = x9; // The first callee-saved register
+ if (StubRoutines::dsin() == NULL) {
+ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
+ } else {
+ fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin());
+ }
+ __ mv(t0, fn);
+ __ jalr(t0);
+ break;
+ case Interpreter::java_lang_math_cos :
+ entry_point = __ pc();
+ __ fld(f10, Address(esp));
+ __ mv(sp, x30);
+ __ mv(x9, ra);
+ continuation = x9; // The first callee-saved register
+ if (StubRoutines::dcos() == NULL) {
+ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);
+ } else {
+ fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos());
+ }
+ __ mv(t0, fn);
+ __ jalr(t0);
+ break;
+ case Interpreter::java_lang_math_tan :
+ entry_point = __ pc();
+ __ fld(f10, Address(esp));
+ __ mv(sp, x30);
+ __ mv(x9, ra);
+ continuation = x9; // The first callee-saved register
+ if (StubRoutines::dtan() == NULL) {
+ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);
+ } else {
+ fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan());
+ }
+ __ mv(t0, fn);
+ __ jalr(t0);
+ break;
+ case Interpreter::java_lang_math_log :
+ entry_point = __ pc();
+ __ fld(f10, Address(esp));
+ __ mv(sp, x30);
+ __ mv(x9, ra);
+ continuation = x9; // The first callee-saved register
+ if (StubRoutines::dlog() == NULL) {
+ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);
+ } else {
+ fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog());
+ }
+ __ mv(t0, fn);
+ __ jalr(t0);
+ break;
+ case Interpreter::java_lang_math_log10 :
+ entry_point = __ pc();
+ __ fld(f10, Address(esp));
+ __ mv(sp, x30);
+ __ mv(x9, ra);
+ continuation = x9; // The first callee-saved register
+ if (StubRoutines::dlog10() == NULL) {
+ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
+ } else {
+ fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10());
+ }
+ __ mv(t0, fn);
+ __ jalr(t0);
+ break;
+ case Interpreter::java_lang_math_exp :
+ entry_point = __ pc();
+ __ fld(f10, Address(esp));
+ __ mv(sp, x30);
+ __ mv(x9, ra);
+ continuation = x9; // The first callee-saved register
+ if (StubRoutines::dexp() == NULL) {
+ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
+ } else {
+ fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp());
+ }
+ __ mv(t0, fn);
+ __ jalr(t0);
+ break;
+ case Interpreter::java_lang_math_pow :
+ entry_point = __ pc();
+ __ mv(x9, ra);
+ continuation = x9;
+ __ fld(f10, Address(esp, 2 * Interpreter::stackElementSize));
+ __ fld(f11, Address(esp));
+ __ mv(sp, x30);
+ if (StubRoutines::dpow() == NULL) {
+ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow);
+ } else {
+ fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow());
+ }
+ __ mv(t0, fn);
+ __ jalr(t0);
+ break;
+ case Interpreter::java_lang_math_fmaD :
+ if (UseFMA) {
+ entry_point = __ pc();
+ __ fld(f10, Address(esp, 4 * Interpreter::stackElementSize));
+ __ fld(f11, Address(esp, 2 * Interpreter::stackElementSize));
+ __ fld(f12, Address(esp));
+ __ fmadd_d(f10, f10, f11, f12);
+ __ mv(sp, x30); // Restore caller's SP
+ }
+ break;
+ case Interpreter::java_lang_math_fmaF :
+ if (UseFMA) {
+ entry_point = __ pc();
+ __ flw(f10, Address(esp, 2 * Interpreter::stackElementSize));
+ __ flw(f11, Address(esp, Interpreter::stackElementSize));
+ __ flw(f12, Address(esp));
+ __ fmadd_s(f10, f10, f11, f12);
+ __ mv(sp, x30); // Restore caller's SP
+ }
+ break;
+ default:
+ ;
+ }
+ if (entry_point != NULL) {
+ __ jr(continuation);
+ }
+
+ return entry_point;
+}
+
+// Abstract method entry
+// Attempt to execute abstract method. Throw exception
+address TemplateInterpreterGenerator::generate_abstract_entry(void) {
+ // xmethod: Method*
+ // x30: sender SP
+
+ address entry_point = __ pc();
+
+ // abstract method entry
+
+ // pop return address, reset last_sp to NULL
+ __ empty_expression_stack();
+ __ restore_bcp(); // bcp must be correct for exception handler (was destroyed)
+ __ restore_locals(); // make sure locals pointer is correct as well (was destroyed)
+
+ // throw exception
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::throw_AbstractMethodErrorWithMethod),
+ xmethod);
+ // the call_VM checks for exception, so we should never return here.
+ __ should_not_reach_here();
+
+ return entry_point;
+}
+
+address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
+ address entry = __ pc();
+
+#ifdef ASSERT
+ {
+ Label L;
+ __ ld(t0, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize));
+ __ mv(t1, sp);
+ // maximal sp for current fp (stack grows negative)
+ // check if frame is complete
+ __ bge(t0, t1, L);
+ __ stop ("interpreter frame not set up");
+ __ bind(L);
+ }
+#endif // ASSERT
+ // Restore bcp under the assumption that the current frame is still
+ // interpreted
+ __ restore_bcp();
+
+ // expression stack must be empty before entering the VM if an
+ // exception happened
+ __ empty_expression_stack();
+ // throw exception
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError));
+ return entry;
+}
+
+address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() {
+ address entry = __ pc();
+ // expression stack must be empty before entering the VM if an
+ // exception happened
+ __ empty_expression_stack();
+ // setup parameters
+
+ // convention: expect aberrant index in register x11
+ __ zero_extend(c_rarg2, x11, 32);
+ // convention: expect array in register x13
+ __ mv(c_rarg1, x13);
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::
+ throw_ArrayIndexOutOfBoundsException),
+ c_rarg1, c_rarg2);
+ return entry;
+}
+
+address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
+ address entry = __ pc();
+
+ // object is at TOS
+ __ pop_reg(c_rarg1);
+
+ // expression stack must be empty before entering the VM if an
+ // exception happened
+ __ empty_expression_stack();
+
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::
+ throw_ClassCastException),
+ c_rarg1);
+ return entry;
+}
+
+address TemplateInterpreterGenerator::generate_exception_handler_common(
+ const char* name, const char* message, bool pass_oop) {
+ assert(!pass_oop || message == NULL, "either oop or message but not both");
+ address entry = __ pc();
+ if (pass_oop) {
+ // object is at TOS
+ __ pop_reg(c_rarg2);
+ }
+ // expression stack must be empty before entering the VM if an
+ // exception happened
+ __ empty_expression_stack();
+ // setup parameters
+ __ la(c_rarg1, Address((address)name));
+ if (pass_oop) {
+ __ call_VM(x10, CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::
+ create_klass_exception),
+ c_rarg1, c_rarg2);
+ } else {
+ // kind of lame ExternalAddress can't take NULL because
+ // external_word_Relocation will assert.
+ if (message != NULL) {
+ __ la(c_rarg2, Address((address)message));
+ } else {
+ __ mv(c_rarg2, NULL_WORD);
+ }
+ __ call_VM(x10,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception),
+ c_rarg1, c_rarg2);
+ }
+ // throw exception
+ __ j(address(Interpreter::throw_exception_entry()));
+ return entry;
+}
+
+address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
+ address entry = __ pc();
+
+ // Restore stack bottom in case i2c adjusted stack
+ __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
+ // and NULL it as marker that esp is now tos until next java call
+ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
+ __ restore_bcp();
+ __ restore_locals();
+ __ restore_constant_pool_cache();
+ __ get_method(xmethod);
+
+ if (state == atos) {
+ Register obj = x10;
+ Register mdp = x11;
+ Register tmp = x12;
+ __ ld(mdp, Address(xmethod, Method::method_data_offset()));
+ __ profile_return_type(mdp, obj, tmp);
+ }
+
+ // Pop N words from the stack
+ __ get_cache_and_index_at_bcp(x11, x12, 1, index_size);
+ __ ld(x11, Address(x11, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
+ __ andi(x11, x11, ConstantPoolCacheEntry::parameter_size_mask);
+
+ __ shadd(esp, x11, esp, t0, 3);
+
+ // Restore machine SP
+ __ ld(t0, Address(xmethod, Method::const_offset()));
+ __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
+ __ addi(t0, t0, frame::interpreter_frame_monitor_size() + 2);
+ __ ld(t1,
+ Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
+ __ slli(t0, t0, 3);
+ __ sub(t0, t1, t0);
+ __ andi(sp, t0, -16);
+
+ __ check_and_handle_popframe(xthread);
+ __ check_and_handle_earlyret(xthread);
+
+ __ get_dispatch();
+ __ dispatch_next(state, step);
+
+ return entry;
+}
+
+address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state,
+ int step,
+ address continuation) {
+ address entry = __ pc();
+ __ restore_bcp();
+ __ restore_locals();
+ __ restore_constant_pool_cache();
+ __ get_method(xmethod);
+ __ get_dispatch();
+
+ // Calculate stack limit
+ __ ld(t0, Address(xmethod, Method::const_offset()));
+ __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
+ __ addi(t0, t0, frame::interpreter_frame_monitor_size() + 2);
+ __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
+ __ slli(t0, t0, 3);
+ __ sub(t0, t1, t0);
+ __ andi(sp, t0, -16);
+
+ // Restore expression stack pointer
+ __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
+ // NULL last_sp until next java call
+ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
+
+ // handle exceptions
+ {
+ Label L;
+ __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
+ __ beqz(t0, L);
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception));
+ __ should_not_reach_here();
+ __ bind(L);
+ }
+
+ if (continuation == NULL) {
+ __ dispatch_next(state, step);
+ } else {
+ __ jump_to_entry(continuation);
+ }
+ return entry;
+}
+
+address TemplateInterpreterGenerator::generate_result_handler_for(BasicType type) {
+ address entry = __ pc();
+ if (type == T_OBJECT) {
+ // retrieve result from frame
+ __ ld(x10, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize));
+ // and verify it
+ __ verify_oop(x10);
+ } else {
+ __ cast_primitive_type(type, x10);
+ }
+
+ __ ret(); // return from result handler
+ return entry;
+}
+
+address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state,
+ address runtime_entry) {
+ assert_cond(runtime_entry != NULL);
+ address entry = __ pc();
+ __ push(state);
+ __ call_VM(noreg, runtime_entry);
+ __ fence(0xf, 0xf);
+ __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));
+ return entry;
+}
+
+// Helpers for commoning out cases in the various type of method entries.
+//
+
+
+// increment invocation count & check for overflow
+//
+// Note: checking for negative value instead of overflow
+// so we have a 'sticky' overflow test
+//
+// xmethod: method
+//
+void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow) {
+ Label done;
+ // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not.
+ int increment = InvocationCounter::count_increment;
+ Label no_mdo;
+ if (ProfileInterpreter) {
+ // Are we profiling?
+ __ ld(x10, Address(xmethod, Method::method_data_offset()));
+ __ beqz(x10, no_mdo);
+ // Increment counter in the MDO
+ const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) +
+ in_bytes(InvocationCounter::counter_offset()));
+ const Address mask(x10, in_bytes(MethodData::invoke_mask_offset()));
+ __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow);
+ __ j(done);
+ }
+ __ bind(no_mdo);
+ // Increment counter in MethodCounters
+ const Address invocation_counter(t1,
+ MethodCounters::invocation_counter_offset() +
+ InvocationCounter::counter_offset());
+ __ get_method_counters(xmethod, t1, done);
+ const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset()));
+ __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow);
+ __ bind(done);
+}
+
+void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) {
+ __ mv(c_rarg1, zr);
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), c_rarg1);
+ __ j(do_continue);
+}
+
+// See if we've got enough room on the stack for locals plus overhead
+// below JavaThread::stack_overflow_limit(). If not, throw a StackOverflowError
+// without going through the signal handler, i.e., reserved and yellow zones
+// will not be made usable. The shadow zone must suffice to handle the
+// overflow.
+// The expression stack grows down incrementally, so the normal guard
+// page mechanism will work for that.
+//
+// NOTE: Since the additional locals are also always pushed (wasn't
+// obvious in generate_method_entry) so the guard should work for them
+// too.
+//
+// Args:
+// x13: number of additional locals this frame needs (what we must check)
+// xmethod: Method*
+//
+// Kills:
+// x10
+void TemplateInterpreterGenerator::generate_stack_overflow_check(void) {
+
+ // monitor entry size: see picture of stack set
+ // (generate_method_entry) and frame_amd64.hpp
+ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+
+ // total overhead size: entry_size + (saved fp through expr stack
+ // bottom). be sure to change this if you add/subtract anything
+ // to/from the overhead area
+ const int overhead_size =
+ -(frame::interpreter_frame_initial_sp_offset * wordSize) + entry_size;
+
+ const int page_size = os::vm_page_size();
+
+ Label after_frame_check;
+
+ // see if the frame is greater than one page in size. If so,
+ // then we need to verify there is enough stack space remaining
+ // for the additional locals.
+ __ mv(t0, (page_size - overhead_size) / Interpreter::stackElementSize);
+ __ bleu(x13, t0, after_frame_check);
+
+ // compute sp as if this were going to be the last frame on
+ // the stack before the red zone
+
+ // locals + overhead, in bytes
+ __ mv(x10, overhead_size);
+ __ shadd(x10, x13, x10, t0, Interpreter::logStackElementSize); // 2 slots per parameter.
+
+ const Address stack_limit(xthread, JavaThread::stack_overflow_limit_offset());
+ __ ld(t0, stack_limit);
+
+#ifdef ASSERT
+ Label limit_okay;
+ // Verify that thread stack limit is non-zero.
+ __ bnez(t0, limit_okay);
+ __ stop("stack overflow limit is zero");
+ __ bind(limit_okay);
+#endif
+
+ // Add stack limit to locals.
+ __ add(x10, x10, t0);
+
+ // Check against the current stack bottom.
+ __ bgtu(sp, x10, after_frame_check);
+
+ // Remove the incoming args, peeling the machine SP back to where it
+ // was in the caller. This is not strictly necessary, but unless we
+ // do so the stack frame may have a garbage FP; this ensures a
+ // correct call stack that we can always unwind. The ANDI should be
+ // unnecessary because the sender SP in x30 is always aligned, but
+ // it doesn't hurt.
+ __ andi(sp, x30, -16);
+
+ // Note: the restored frame is not necessarily interpreted.
+ // Use the shared runtime version of the StackOverflowError.
+ assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
+ __ far_jump(RuntimeAddress(StubRoutines::throw_StackOverflowError_entry()));
+
+ // all done with frame size check
+ __ bind(after_frame_check);
+}
+
+// Allocate monitor and lock method (asm interpreter)
+//
+// Args:
+// xmethod: Method*
+// xlocals: locals
+//
+// Kills:
+// x10
+// c_rarg0, c_rarg1, c_rarg2, c_rarg3, ...(param regs)
+// t0, t1 (temporary regs)
+void TemplateInterpreterGenerator::lock_method() {
+ // synchronize method
+ const Address access_flags(xmethod, Method::access_flags_offset());
+ const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+
+#ifdef ASSERT
+ __ lwu(x10, access_flags);
+ __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method doesn't need synchronization", false);
+#endif // ASSERT
+
+ // get synchronization object
+ {
+ Label done;
+ __ lwu(x10, access_flags);
+ __ andi(t0, x10, JVM_ACC_STATIC);
+ // get receiver (assume this is frequent case)
+ __ ld(x10, Address(xlocals, Interpreter::local_offset_in_bytes(0)));
+ __ beqz(t0, done);
+ __ load_mirror(x10, xmethod);
+
+#ifdef ASSERT
+ {
+ Label L;
+ __ bnez(x10, L);
+ __ stop("synchronization object is NULL");
+ __ bind(L);
+ }
+#endif // ASSERT
+
+ __ bind(done);
+ }
+
+ // add space for monitor & lock
+ __ add(sp, sp, - entry_size); // add space for a monitor entry
+ __ add(esp, esp, - entry_size);
+ __ mv(t0, esp);
+ __ sd(t0, monitor_block_top); // set new monitor block top
+ // store object
+ __ sd(x10, Address(esp, BasicObjectLock::obj_offset_in_bytes()));
+ __ mv(c_rarg1, esp); // object address
+ __ lock_object(c_rarg1);
+}
+
+// Generate a fixed interpreter frame. This is identical setup for
+// interpreted methods and for native methods hence the shared code.
+//
+// Args:
+// ra: return address
+// xmethod: Method*
+// xlocals: pointer to locals
+// xcpool: cp cache
+// stack_pointer: previous sp
+void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
+ // initialize fixed part of activation frame
+ if (native_call) {
+ __ add(esp, sp, - 14 * wordSize);
+ __ mv(xbcp, zr);
+ __ add(sp, sp, - 14 * wordSize);
+ // add 2 zero-initialized slots for native calls
+ __ sd(zr, Address(sp, 13 * wordSize));
+ __ sd(zr, Address(sp, 12 * wordSize));
+ } else {
+ __ add(esp, sp, - 12 * wordSize);
+ __ ld(t0, Address(xmethod, Method::const_offset())); // get ConstMethod
+ __ add(xbcp, t0, in_bytes(ConstMethod::codes_offset())); // get codebase
+ __ add(sp, sp, - 12 * wordSize);
+ }
+ __ sd(xbcp, Address(sp, wordSize));
+ __ sd(esp, Address(sp, 0));
+
+ if (ProfileInterpreter) {
+ Label method_data_continue;
+ __ ld(t0, Address(xmethod, Method::method_data_offset()));
+ __ beqz(t0, method_data_continue);
+ __ la(t0, Address(t0, in_bytes(MethodData::data_offset())));
+ __ bind(method_data_continue);
+ }
+
+ __ sd(xmethod, Address(sp, 7 * wordSize));
+ __ sd(ProfileInterpreter ? t0 : zr, Address(sp, 6 * wordSize));
+
+ // Get mirror and store it in the frame as GC root for this Method*
+ __ load_mirror(t2, xmethod);
+ __ sd(zr, Address(sp, 5 * wordSize));
+ __ sd(t2, Address(sp, 4 * wordSize));
+
+ __ ld(xcpool, Address(xmethod, Method::const_offset()));
+ __ ld(xcpool, Address(xcpool, ConstMethod::constants_offset()));
+ __ ld(xcpool, Address(xcpool, ConstantPool::cache_offset_in_bytes()));
+ __ sd(xcpool, Address(sp, 3 * wordSize));
+ __ sd(xlocals, Address(sp, 2 * wordSize));
+
+ __ sd(ra, Address(sp, 11 * wordSize));
+ __ sd(fp, Address(sp, 10 * wordSize));
+ __ la(fp, Address(sp, 12 * wordSize)); // include ra & fp
+
+ // set sender sp
+ // leave last_sp as null
+ __ sd(x30, Address(sp, 9 * wordSize));
+ __ sd(zr, Address(sp, 8 * wordSize));
+
+ // Move SP out of the way
+ if (!native_call) {
+ __ ld(t0, Address(xmethod, Method::const_offset()));
+ __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
+ __ add(t0, t0, frame::interpreter_frame_monitor_size() + 2);
+ __ slli(t0, t0, 3);
+ __ sub(t0, sp, t0);
+ __ andi(sp, t0, -16);
+ }
+}
+
+// End of helpers
+
+// Various method entries
+//------------------------------------------------------------------------------------------------------------------------
+//
+//
+
+// Method entry for java.lang.ref.Reference.get.
+address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
+ // Code: _aload_0, _getfield, _areturn
+ // parameter size = 1
+ //
+ // The code that gets generated by this routine is split into 2 parts:
+ // 1. The "intrinsified" code for G1 (or any SATB based GC),
+ // 2. The slow path - which is an expansion of the regular method entry.
+ //
+ // Notes:-
+ // * In the G1 code we do not check whether we need to block for
+ // a safepoint. If G1 is enabled then we must execute the specialized
+ // code for Reference.get (except when the Reference object is null)
+ // so that we can log the value in the referent field with an SATB
+ // update buffer.
+ // If the code for the getfield template is modified so that the
+ // G1 pre-barrier code is executed when the current method is
+ // Reference.get() then going through the normal method entry
+ // will be fine.
+ // * The G1 code can, however, check the receiver object (the instance
+ // of java.lang.Reference) and jump to the slow path if null. If the
+ // Reference object is null then we obviously cannot fetch the referent
+ // and so we don't need to call the G1 pre-barrier. Thus we can use the
+ // regular method entry code to generate the NPE.
+ //
+ // This code is based on generate_accessor_entry.
+ //
+ // xmethod: Method*
+ // x30: senderSP must preserve for slow path, set SP to it on fast path
+
+ // ra is live. It must be saved around calls.
+
+ address entry = __ pc();
+
+ const int referent_offset = java_lang_ref_Reference::referent_offset();
+ guarantee(referent_offset > 0, "referent offset not initialized");
+
+ Label slow_path;
+ const Register local_0 = c_rarg0;
+ // Check if local 0 != NULL
+ // If the receiver is null then it is OK to jump to the slow path.
+ __ ld(local_0, Address(esp, 0));
+ __ beqz(local_0, slow_path);
+
+ __ mv(x9, x30); // Move senderSP to a callee-saved register
+
+ // Load the value of the referent field.
+ const Address field_address(local_0, referent_offset);
+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+ bs->load_at(_masm, IN_HEAP | ON_WEAK_OOP_REF, T_OBJECT, local_0, field_address, /*tmp1*/ t1, /*tmp2*/ t0);
+
+ // areturn
+ __ andi(sp, x9, -16); // done with stack
+ __ ret();
+
+ // generate a vanilla interpreter entry as the slow path
+ __ bind(slow_path);
+ __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
+ return entry;
+}
+
+/**
+ * Method entry for static native methods:
+ * int java.util.zip.CRC32.update(int crc, int b)
+ */
+address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
+ // TODO: Unimplemented generate_CRC32_update_entry
+ return 0;
+}
+
+/**
+ * Method entry for static native methods:
+ * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
+ * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
+ */
+address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
+ // TODO: Unimplemented generate_CRC32_updateBytes_entry
+ return 0;
+}
+
+/**
+ * Method entry for intrinsic-candidate (non-native) methods:
+ * int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
+ * int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end)
+ * Unlike CRC32, CRC32C does not have any methods marked as native
+ * CRC32C also uses an "end" variable instead of the length variable CRC32 uses
+ */
+address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
+ // TODO: Unimplemented generate_CRC32C_updateBytes_entry
+ return 0;
+}
+
+void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
+ // See more discussion in stackOverflow.hpp.
+
+ const int shadow_zone_size = checked_cast(StackOverflow::stack_shadow_zone_size());
+ const int page_size = os::vm_page_size();
+ const int n_shadow_pages = shadow_zone_size / page_size;
+
+#ifdef ASSERT
+ Label L_good_limit;
+ __ ld(t0, Address(xthread, JavaThread::shadow_zone_safe_limit()));
+ __ bnez(t0, L_good_limit);
+ __ stop("shadow zone safe limit is not initialized");
+ __ bind(L_good_limit);
+
+ Label L_good_watermark;
+ __ ld(t0, Address(xthread, JavaThread::shadow_zone_growth_watermark()));
+ __ bnez(t0, L_good_watermark);
+ __ stop("shadow zone growth watermark is not initialized");
+ __ bind(L_good_watermark);
+#endif
+
+ Label L_done;
+
+ __ ld(t0, Address(xthread, JavaThread::shadow_zone_growth_watermark()));
+ __ bgtu(sp, t0, L_done);
+
+ for (int p = 1; p <= n_shadow_pages; p++) {
+ __ bang_stack_with_offset(p * page_size);
+ }
+
+ // Record the new watermark, but only if the update is above the safe limit.
+ // Otherwise, the next time around the check above would pass the safe limit.
+ __ ld(t0, Address(xthread, JavaThread::shadow_zone_safe_limit()));
+ __ bleu(sp, t0, L_done);
+ __ sd(sp, Address(xthread, JavaThread::shadow_zone_growth_watermark()));
+
+ __ bind(L_done);
+}
+
+// Interpreter stub for calling a native method. (asm interpreter)
+// This sets up a somewhat different looking stack for calling the
+// native method than the typical interpreter frame setup.
+address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
+ // determine code generation flags
+ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods;
+
+ // x11: Method*
+ // x30: sender sp
+
+ address entry_point = __ pc();
+
+ const Address constMethod (xmethod, Method::const_offset());
+ const Address access_flags (xmethod, Method::access_flags_offset());
+ const Address size_of_parameters(x12, ConstMethod::
+ size_of_parameters_offset());
+
+ // get parameter size (always needed)
+ __ ld(x12, constMethod);
+ __ load_unsigned_short(x12, size_of_parameters);
+
+ // Native calls don't need the stack size check since they have no
+ // expression stack and the arguments are already on the stack and
+ // we only add a handful of words to the stack.
+
+ // xmethod: Method*
+ // x12: size of parameters
+ // x30: sender sp
+
+ // for natives the size of locals is zero
+
+ // compute beginning of parameters (xlocals)
+ __ shadd(xlocals, x12, esp, xlocals, 3);
+ __ addi(xlocals, xlocals, -wordSize);
+
+ // Pull SP back to minimum size: this avoids holes in the stack
+ __ andi(sp, esp, -16);
+
+ // initialize fixed part of activation frame
+ generate_fixed_frame(true);
+
+ // make sure method is native & not abstract
+#ifdef ASSERT
+ __ lwu(x10, access_flags);
+ __ verify_access_flags(x10, JVM_ACC_NATIVE, "tried to execute non-native method as native", false);
+ __ verify_access_flags(x10, JVM_ACC_ABSTRACT, "tried to execute abstract method in interpreter");
+#endif
+
+ // Since at this point in the method invocation the exception
+ // handler would try to exit the monitor of synchronized methods
+ // which hasn't been entered yet, we set the thread local variable
+ // _do_not_unlock_if_synchronized to true. The remove_activation
+ // will check this flag.
+
+ const Address do_not_unlock_if_synchronized(xthread,
+ in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+ __ mv(t1, true);
+ __ sb(t1, do_not_unlock_if_synchronized);
+
+ // increment invocation count & check for overflow
+ Label invocation_counter_overflow;
+ if (inc_counter) {
+ generate_counter_incr(&invocation_counter_overflow);
+ }
+
+ Label continue_after_compile;
+ __ bind(continue_after_compile);
+
+ bang_stack_shadow_pages(true);
+
+ // reset the _do_not_unlock_if_synchronized flag
+ __ sb(zr, do_not_unlock_if_synchronized);
+
+ // check for synchronized methods
+ // Must happen AFTER invocation_counter check and stack overflow check,
+ // so method is not locked if overflows.
+ if (synchronized) {
+ lock_method();
+ } else {
+ // no synchronization necessary
+#ifdef ASSERT
+ __ lwu(x10, access_flags);
+ __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method needs synchronization");
+#endif
+ }
+
+ // start execution
+#ifdef ASSERT
+ __ verify_frame_setup();
+#endif
+
+ // jvmti support
+ __ notify_method_entry();
+
+ // work registers
+ const Register t = x18;
+ const Register result_handler = x19;
+
+ // allocate space for parameters
+ __ ld(t, Address(xmethod, Method::const_offset()));
+ __ load_unsigned_short(t, Address(t, ConstMethod::size_of_parameters_offset()));
+
+ __ slli(t, t, Interpreter::logStackElementSize);
+ __ sub(x30, esp, t);
+ __ andi(sp, x30, -16);
+ __ mv(esp, x30);
+
+ // get signature handler
+ {
+ Label L;
+ __ ld(t, Address(xmethod, Method::signature_handler_offset()));
+ __ bnez(t, L);
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::prepare_native_call),
+ xmethod);
+ __ ld(t, Address(xmethod, Method::signature_handler_offset()));
+ __ bind(L);
+ }
+
+ // call signature handler
+ assert(InterpreterRuntime::SignatureHandlerGenerator::from() == xlocals,
+ "adjust this code");
+ assert(InterpreterRuntime::SignatureHandlerGenerator::to() == sp,
+ "adjust this code");
+ assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t0,
+ "adjust this code");
+
+ // The generated handlers do not touch xmethod (the method).
+ // However, large signatures cannot be cached and are generated
+ // each time here. The slow-path generator can do a GC on return,
+ // so we must reload it after the call.
+ __ jalr(t);
+ __ get_method(xmethod); // slow path can do a GC, reload xmethod
+
+
+ // result handler is in x10
+ // set result handler
+ __ mv(result_handler, x10);
+ // pass mirror handle if static call
+ {
+ Label L;
+ __ lwu(t, Address(xmethod, Method::access_flags_offset()));
+ __ andi(t0, t, JVM_ACC_STATIC);
+ __ beqz(t0, L);
+ // get mirror
+ __ load_mirror(t, xmethod);
+ // copy mirror into activation frame
+ __ sd(t, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize));
+ // pass handle to mirror
+ __ addi(c_rarg1, fp, frame::interpreter_frame_oop_temp_offset * wordSize);
+ __ bind(L);
+ }
+
+ // get native function entry point in x28
+ {
+ Label L;
+ __ ld(x28, Address(xmethod, Method::native_function_offset()));
+ address unsatisfied = (SharedRuntime::native_method_throw_unsatisfied_link_error_entry());
+ __ mv(t1, unsatisfied);
+ __ ld(t1, t1);
+ __ bne(x28, t1, L);
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::prepare_native_call),
+ xmethod);
+ __ get_method(xmethod);
+ __ ld(x28, Address(xmethod, Method::native_function_offset()));
+ __ bind(L);
+ }
+
+ // pass JNIEnv
+ __ add(c_rarg0, xthread, in_bytes(JavaThread::jni_environment_offset()));
+
+ // It is enough that the pc() points into the right code
+ // segment. It does not have to be the correct return pc.
+ Label native_return;
+ __ set_last_Java_frame(esp, fp, native_return, x30);
+
+ // change thread state
+#ifdef ASSERT
+ {
+ Label L;
+ __ lwu(t, Address(xthread, JavaThread::thread_state_offset()));
+ __ addi(t0, zr, (u1)_thread_in_Java);
+ __ beq(t, t0, L);
+ __ stop("Wrong thread state in native stub");
+ __ bind(L);
+ }
+#endif
+
+ // Change state to native
+ __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
+ __ mv(t0, _thread_in_native);
+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
+ __ sw(t0, Address(t1));
+
+ // Call the native method.
+ __ jalr(x28);
+ __ bind(native_return);
+ __ get_method(xmethod);
+ // result potentially in x10 or f10
+
+ // make room for the pushes we're about to do
+ __ sub(t0, esp, 4 * wordSize);
+ __ andi(sp, t0, -16);
+
+ // NOTE: The order of these pushes is known to frame::interpreter_frame_result
+ // in order to extract the result of a method call. If the order of these
+ // pushes change or anything else is added to the stack then the code in
+ // interpreter_frame_result must also change.
+ __ push(dtos);
+ __ push(ltos);
+
+ // change thread state
+ // Force all preceding writes to be observed prior to thread state change
+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
+
+ __ mv(t0, _thread_in_native_trans);
+ __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
+
+ // Force this write out before the read below
+ __ membar(MacroAssembler::AnyAny);
+
+ // check for safepoint operation in progress and/or pending suspend requests
+ {
+ Label L, Continue;
+
+ // We need an acquire here to ensure that any subsequent load of the
+ // global SafepointSynchronize::_state flag is ordered after this load
+ // of the thread-local polling word. We don't want this poll to
+ // return false (i.e. not safepointing) and a later poll of the global
+ // SafepointSynchronize::_state spuriously to return true.
+ //
+ // This is to avoid a race when we're in a native->Java transition
+ // racing the code which wakes up from a safepoint.
+ __ safepoint_poll(L, true /* at_return */, true /* acquire */, false /* in_nmethod */);
+ __ lwu(t1, Address(xthread, JavaThread::suspend_flags_offset()));
+ __ beqz(t1, Continue);
+ __ bind(L);
+
+ // Don't use call_VM as it will see a possible pending exception
+ // and forward it and never return here preventing us from
+ // clearing _last_native_pc down below. So we do a runtime call by
+ // hand.
+ //
+ __ mv(c_rarg0, xthread);
+ __ mv(t1, CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
+ __ jalr(t1);
+ __ get_method(xmethod);
+ __ reinit_heapbase();
+ __ bind(Continue);
+ }
+
+ // change thread state
+ // Force all preceding writes to be observed prior to thread state change
+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
+
+ __ mv(t0, _thread_in_Java);
+ __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
+
+ // reset_last_Java_frame
+ __ reset_last_Java_frame(true);
+
+ if (CheckJNICalls) {
+ // clear_pending_jni_exception_check
+ __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset()));
+ }
+
+ // reset handle block
+ __ ld(t, Address(xthread, JavaThread::active_handles_offset()));
+ __ sd(zr, Address(t, JNIHandleBlock::top_offset_in_bytes()));
+
+ // If result is an oop unbox and store it in frame where gc will see it
+ // and result handler will pick it up
+
+ {
+ Label no_oop;
+ __ la(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT)));
+ __ bne(t, result_handler, no_oop);
+ // Unbox oop result, e.g. JNIHandles::resolve result.
+ __ pop(ltos);
+ __ resolve_jobject(x10, xthread, t);
+ __ sd(x10, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize));
+ // keep stack depth as expected by pushing oop which will eventually be discarded
+ __ push(ltos);
+ __ bind(no_oop);
+ }
+
+ {
+ Label no_reguard;
+ __ lwu(t0, Address(xthread, in_bytes(JavaThread::stack_guard_state_offset())));
+ __ addi(t1, zr, (u1)StackOverflow::stack_guard_yellow_reserved_disabled);
+ __ bne(t0, t1, no_reguard);
+
+ __ pusha(); // only save smashed registers
+ __ mv(c_rarg0, xthread);
+ __ mv(t1, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
+ __ jalr(t1);
+ __ popa(); // only restore smashed registers
+ __ bind(no_reguard);
+ }
+
+ // The method register is junk from after the thread_in_native transition
+ // until here. Also can't call_VM until the bcp has been
+ // restored. Need bcp for throwing exception below so get it now.
+ __ get_method(xmethod);
+
+ // restore bcp to have legal interpreter frame, i.e., bci == 0 <=>
+ // xbcp == code_base()
+ __ ld(xbcp, Address(xmethod, Method::const_offset())); // get ConstMethod*
+ __ add(xbcp, xbcp, in_bytes(ConstMethod::codes_offset())); // get codebase
+ // handle exceptions (exception handling will handle unlocking!)
+ {
+ Label L;
+ __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
+ __ beqz(t0, L);
+ // Note: At some point we may want to unify this with the code
+ // used in call_VM_base(); i.e., we should use the
+ // StubRoutines::forward_exception code. For now this doesn't work
+ // here because the sp is not correctly set at this point.
+ __ MacroAssembler::call_VM(noreg,
+ CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::throw_pending_exception));
+ __ should_not_reach_here();
+ __ bind(L);
+ }
+
+ // do unlocking if necessary
+ {
+ Label L;
+ __ lwu(t, Address(xmethod, Method::access_flags_offset()));
+ __ andi(t0, t, JVM_ACC_SYNCHRONIZED);
+ __ beqz(t0, L);
+ // the code below should be shared with interpreter macro
+ // assembler implementation
+ {
+ Label unlock;
+ // BasicObjectLock will be first in list, since this is a
+ // synchronized method. However, need to check that the object
+ // has not been unlocked by an explicit monitorexit bytecode.
+
+ // monitor expect in c_rarg1 for slow unlock path
+ __ la(c_rarg1, Address(fp, // address of first monitor
+ (intptr_t)(frame::interpreter_frame_initial_sp_offset *
+ wordSize - sizeof(BasicObjectLock))));
+
+ __ ld(t, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
+ __ bnez(t, unlock);
+
+ // Entry already unlocked, need to throw exception
+ __ MacroAssembler::call_VM(noreg,
+ CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::throw_illegal_monitor_state_exception));
+ __ should_not_reach_here();
+
+ __ bind(unlock);
+ __ unlock_object(c_rarg1);
+ }
+ __ bind(L);
+ }
+
+ // jvmti support
+ // Note: This must happen _after_ handling/throwing any exceptions since
+ // the exception handler code notifies the runtime of method exits
+ // too. If this happens before, method entry/exit notifications are
+ // not properly paired (was bug - gri 11/22/99).
+ __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI);
+
+ __ pop(ltos);
+ __ pop(dtos);
+
+ __ jalr(result_handler);
+
+ // remove activation
+ __ ld(esp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp
+ // remove frame anchor
+ __ leave();
+
+ // restore sender sp
+ __ mv(sp, esp);
+
+ __ ret();
+
+ if (inc_counter) {
+ // Handle overflow of counter and compile method
+ __ bind(invocation_counter_overflow);
+ generate_counter_overflow(continue_after_compile);
+ }
+
+ return entry_point;
+}
+
+//
+// Generic interpreted method entry to (asm) interpreter
+//
+address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
+
+ // determine code generation flags
+ const bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods;
+
+ // t0: sender sp
+ address entry_point = __ pc();
+
+ const Address constMethod(xmethod, Method::const_offset());
+ const Address access_flags(xmethod, Method::access_flags_offset());
+ const Address size_of_parameters(x13,
+ ConstMethod::size_of_parameters_offset());
+ const Address size_of_locals(x13, ConstMethod::size_of_locals_offset());
+
+ // get parameter size (always needed)
+ // need to load the const method first
+ __ ld(x13, constMethod);
+ __ load_unsigned_short(x12, size_of_parameters);
+
+ // x12: size of parameters
+
+ __ load_unsigned_short(x13, size_of_locals); // get size of locals in words
+ __ sub(x13, x13, x12); // x13 = no. of additional locals
+
+ // see if we've got enough room on the stack for locals plus overhead.
+ generate_stack_overflow_check();
+
+ // compute beginning of parameters (xlocals)
+ __ shadd(xlocals, x12, esp, t1, 3);
+ __ add(xlocals, xlocals, -wordSize);
+
+ // Make room for additional locals
+ __ slli(t1, x13, 3);
+ __ sub(t0, esp, t1);
+
+ // Padding between locals and fixed part of activation frame to ensure
+ // SP is always 16-byte aligned.
+ __ andi(sp, t0, -16);
+
+ // x13 - # of additional locals
+ // allocate space for locals
+ // explicitly initialize locals
+ {
+ Label exit, loop;
+ __ blez(x13, exit); // do nothing if x13 <= 0
+ __ bind(loop);
+ __ sd(zr, Address(t0));
+ __ add(t0, t0, wordSize);
+ __ add(x13, x13, -1); // until everything initialized
+ __ bnez(x13, loop);
+ __ bind(exit);
+ }
+
+ // And the base dispatch table
+ __ get_dispatch();
+
+ // initialize fixed part of activation frame
+ generate_fixed_frame(false);
+
+ // make sure method is not native & not abstract
+#ifdef ASSERT
+ __ lwu(x10, access_flags);
+ __ verify_access_flags(x10, JVM_ACC_NATIVE, "tried to execute native method as non-native");
+ __ verify_access_flags(x10, JVM_ACC_ABSTRACT, "tried to execute abstract method in interpreter");
+#endif
+
+ // Since at this point in the method invocation the exception
+ // handler would try to exit the monitor of synchronized methods
+ // which hasn't been entered yet, we set the thread local variable
+ // _do_not_unlock_if_synchronized to true. The remove_activation
+ // will check this flag.
+
+ const Address do_not_unlock_if_synchronized(xthread,
+ in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+ __ mv(t1, true);
+ __ sb(t1, do_not_unlock_if_synchronized);
+
+ Label no_mdp;
+ const Register mdp = x13;
+ __ ld(mdp, Address(xmethod, Method::method_data_offset()));
+ __ beqz(mdp, no_mdp);
+ __ add(mdp, mdp, in_bytes(MethodData::data_offset()));
+ __ profile_parameters_type(mdp, x11, x12, x14); // use x11, x12, x14 as tmp registers
+ __ bind(no_mdp);
+
+ // increment invocation count & check for overflow
+ Label invocation_counter_overflow;
+ if (inc_counter) {
+ generate_counter_incr(&invocation_counter_overflow);
+ }
+
+ Label continue_after_compile;
+ __ bind(continue_after_compile);
+
+ bang_stack_shadow_pages(false);
+
+ // reset the _do_not_unlock_if_synchronized flag
+ __ sb(zr, do_not_unlock_if_synchronized);
+
+ // check for synchronized methods
+ // Must happen AFTER invocation_counter check and stack overflow check,
+ // so method is not locked if overflows.
+ if (synchronized) {
+ // Allocate monitor and lock method
+ lock_method();
+ } else {
+ // no synchronization necessary
+#ifdef ASSERT
+ __ lwu(x10, access_flags);
+ __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method needs synchronization");
+#endif
+ }
+
+ // start execution
+#ifdef ASSERT
+ __ verify_frame_setup();
+#endif
+
+ // jvmti support
+ __ notify_method_entry();
+
+ __ dispatch_next(vtos);
+
+ // invocation counter overflow
+ if (inc_counter) {
+ // Handle overflow of counter and compile method
+ __ bind(invocation_counter_overflow);
+ generate_counter_overflow(continue_after_compile);
+ }
+
+ return entry_point;
+}
+
+//-----------------------------------------------------------------------------
+// Exceptions
+
+void TemplateInterpreterGenerator::generate_throw_exception() {
+ // Entry point in previous activation (i.e., if the caller was
+ // interpreted)
+ Interpreter::_rethrow_exception_entry = __ pc();
+ // Restore sp to interpreter_frame_last_sp even though we are going
+ // to empty the expression stack for the exception processing.
+ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
+ // x10: exception
+ // x13: return address/pc that threw exception
+ __ restore_bcp(); // xbcp points to call/send
+ __ restore_locals();
+ __ restore_constant_pool_cache();
+ __ reinit_heapbase(); // restore xheapbase as heapbase.
+ __ get_dispatch();
+
+ // Entry point for exceptions thrown within interpreter code
+ Interpreter::_throw_exception_entry = __ pc();
+ // If we came here via a NullPointerException on the receiver of a
+ // method, xthread may be corrupt.
+ __ get_method(xmethod);
+ // expression stack is undefined here
+ // x10: exception
+ // xbcp: exception bcp
+ __ verify_oop(x10);
+ __ mv(c_rarg1, x10);
+
+ // expression stack must be empty before entering the VM in case of
+ // an exception
+ __ empty_expression_stack();
+ // find exception handler address and preserve exception oop
+ __ call_VM(x13,
+ CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::exception_handler_for_exception),
+ c_rarg1);
+
+ // Calculate stack limit
+ __ ld(t0, Address(xmethod, Method::const_offset()));
+ __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
+ __ add(t0, t0, frame::interpreter_frame_monitor_size() + 4);
+ __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
+ __ slli(t0, t0, 3);
+ __ sub(t0, t1, t0);
+ __ andi(sp, t0, -16);
+
+ // x10: exception handler entry point
+ // x13: preserved exception oop
+ // xbcp: bcp for exception handler
+ __ push_ptr(x13); // push exception which is now the only value on the stack
+ __ jr(x10); // jump to exception handler (may be _remove_activation_entry!)
+
+ // If the exception is not handled in the current frame the frame is
+ // removed and the exception is rethrown (i.e. exception
+ // continuation is _rethrow_exception).
+ //
+ // Note: At this point the bci is still the bxi for the instruction
+ // which caused the exception and the expression stack is
+ // empty. Thus, for any VM calls at this point, GC will find a legal
+ // oop map (with empty expression stack).
+
+ //
+ // JVMTI PopFrame support
+ //
+
+ Interpreter::_remove_activation_preserving_args_entry = __ pc();
+ __ empty_expression_stack();
+ // Set the popframe_processing bit in pending_popframe_condition
+ // indicating that we are currently handling popframe, so that
+ // call_VMs that may happen later do not trigger new popframe
+ // handling cycles.
+ __ lwu(x13, Address(xthread, JavaThread::popframe_condition_offset()));
+ __ ori(x13, x13, JavaThread::popframe_processing_bit);
+ __ sw(x13, Address(xthread, JavaThread::popframe_condition_offset()));
+
+ {
+ // Check to see whether we are returning to a deoptimized frame.
+ // (The PopFrame call ensures that the caller of the popped frame is
+ // either interpreted or compiled and deoptimizes it if compiled.)
+ // In this case, we can't call dispatch_next() after the frame is
+ // popped, but instead must save the incoming arguments and restore
+ // them after deoptimization has occurred.
+ //
+ // Note that we don't compare the return PC against the
+ // deoptimization blob's unpack entry because of the presence of
+ // adapter frames in C2.
+ Label caller_not_deoptimized;
+ __ ld(c_rarg1, Address(fp, frame::return_addr_offset * wordSize));
+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), c_rarg1);
+ __ bnez(x10, caller_not_deoptimized);
+
+ // Compute size of arguments for saving when returning to
+ // deoptimized caller
+ __ get_method(x10);
+ __ ld(x10, Address(x10, Method::const_offset()));
+ __ load_unsigned_short(x10, Address(x10, in_bytes(ConstMethod::
+ size_of_parameters_offset())));
+ __ slli(x10, x10, Interpreter::logStackElementSize);
+ __ restore_locals();
+ __ sub(xlocals, xlocals, x10);
+ __ add(xlocals, xlocals, wordSize);
+ // Save these arguments
+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address,
+ Deoptimization::
+ popframe_preserve_args),
+ xthread, x10, xlocals);
+
+ __ remove_activation(vtos,
+ /* throw_monitor_exception */ false,
+ /* install_monitor_exception */ false,
+ /* notify_jvmdi */ false);
+
+ // Inform deoptimization that it is responsible for restoring
+ // these arguments
+ __ mv(t0, JavaThread::popframe_force_deopt_reexecution_bit);
+ __ sw(t0, Address(xthread, JavaThread::popframe_condition_offset()));
+
+ // Continue in deoptimization handler
+ __ ret();
+
+ __ bind(caller_not_deoptimized);
+ }
+
+ __ remove_activation(vtos,
+ /* throw_monitor_exception */ false,
+ /* install_monitor_exception */ false,
+ /* notify_jvmdi */ false);
+
+ // Restore the last_sp and null it out
+ __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
+ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
+
+ __ restore_bcp();
+ __ restore_locals();
+ __ restore_constant_pool_cache();
+ __ get_method(xmethod);
+ __ get_dispatch();
+
+ // The method data pointer was incremented already during
+ // call profiling. We have to restore the mdp for the current bcp.
+ if (ProfileInterpreter) {
+ __ set_method_data_pointer_for_bcp();
+ }
+
+ // Clear the popframe condition flag
+ __ sw(zr, Address(xthread, JavaThread::popframe_condition_offset()));
+ assert(JavaThread::popframe_inactive == 0, "fix popframe_inactive");
+
+#if INCLUDE_JVMTI
+ {
+ Label L_done;
+
+ __ lbu(t0, Address(xbcp, 0));
+ __ li(t1, Bytecodes::_invokestatic);
+ __ bne(t1, t0, L_done);
+
+ // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call.
+ // Detect such a case in the InterpreterRuntime function and return the member name argument,or NULL.
+
+ __ ld(c_rarg0, Address(xlocals, 0));
+ __ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null),c_rarg0, xmethod, xbcp);
+
+ __ beqz(x10, L_done);
+
+ __ sd(x10, Address(esp, 0));
+ __ bind(L_done);
+ }
+#endif // INCLUDE_JVMTI
+
+ // Restore machine SP
+ __ ld(t0, Address(xmethod, Method::const_offset()));
+ __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
+ __ add(t0, t0, frame::interpreter_frame_monitor_size() + 4);
+ __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
+ __ slliw(t0, t0, 3);
+ __ sub(t0, t1, t0);
+ __ andi(sp, t0, -16);
+
+ __ dispatch_next(vtos);
+ // end of PopFrame support
+
+ Interpreter::_remove_activation_entry = __ pc();
+
+ // preserve exception over this code sequence
+ __ pop_ptr(x10);
+ __ sd(x10, Address(xthread, JavaThread::vm_result_offset()));
+ // remove the activation (without doing throws on illegalMonitorExceptions)
+ __ remove_activation(vtos, false, true, false);
+ // restore exception
+ __ get_vm_result(x10, xthread);
+
+ // In between activations - previous activation type unknown yet
+ // compute continuation point - the continuation point expects the
+ // following registers set up:
+ //
+ // x10: exception
+ // ra: return address/pc that threw exception
+ // sp: expression stack of caller
+ // fp: fp of caller
+ // FIXME: There's no point saving ra here because VM calls don't trash it
+ __ sub(sp, sp, 2 * wordSize);
+ __ sd(x10, Address(sp, 0)); // save exception
+ __ sd(ra, Address(sp, wordSize)); // save return address
+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address,
+ SharedRuntime::exception_handler_for_return_address),
+ xthread, ra);
+ __ mv(x11, x10); // save exception handler
+ __ ld(x10, Address(sp, 0)); // restore exception
+ __ ld(ra, Address(sp, wordSize)); // restore return address
+ __ add(sp, sp, 2 * wordSize);
+ // We might be returning to a deopt handler that expects x13 to
+ // contain the exception pc
+ __ mv(x13, ra);
+ // Note that an "issuing PC" is actually the next PC after the call
+ __ jr(x11); // jump to exception
+ // handler of caller
+}
+
+//
+// JVMTI ForceEarlyReturn support
+//
+address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) {
+ address entry = __ pc();
+
+ __ restore_bcp();
+ __ restore_locals();
+ __ empty_expression_stack();
+ __ load_earlyret_value(state);
+
+ __ ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset()));
+ Address cond_addr(t0, JvmtiThreadState::earlyret_state_offset());
+
+ // Clear the earlyret state
+ assert(JvmtiThreadState::earlyret_inactive == 0, "should be");
+ __ sd(zr, cond_addr);
+
+ __ remove_activation(state,
+ false, /* throw_monitor_exception */
+ false, /* install_monitor_exception */
+ true); /* notify_jvmdi */
+ __ ret();
+
+ return entry;
+}
+// end of ForceEarlyReturn support
+
+//-----------------------------------------------------------------------------
+// Helper for vtos entry point generation
+
+void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
+ address& bep,
+ address& cep,
+ address& sep,
+ address& aep,
+ address& iep,
+ address& lep,
+ address& fep,
+ address& dep,
+ address& vep) {
+ assert(t != NULL && t->is_valid() && t->tos_in() == vtos, "illegal template");
+ Label L;
+ aep = __ pc(); __ push_ptr(); __ j(L);
+ fep = __ pc(); __ push_f(); __ j(L);
+ dep = __ pc(); __ push_d(); __ j(L);
+ lep = __ pc(); __ push_l(); __ j(L);
+ bep = cep = sep =
+ iep = __ pc(); __ push_i();
+ vep = __ pc();
+ __ bind(L);
+ generate_and_dispatch(t);
+}
+
+//-----------------------------------------------------------------------------
+
+// Non-product code
+#ifndef PRODUCT
+address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
+ address entry = __ pc();
+
+ __ push_reg(ra);
+ __ push(state);
+ __ push_reg(RegSet::range(x10, x17) + RegSet::range(x5, x7) + RegSet::range(x28, x31), sp);
+ __ mv(c_rarg2, x10); // Pass itos
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), c_rarg1, c_rarg2, c_rarg3);
+ __ pop_reg(RegSet::range(x10, x17) + RegSet::range(x5, x7) + RegSet::range(x28, x31), sp);
+ __ pop(state);
+ __ pop_reg(ra);
+ __ ret(); // return from result handler
+
+ return entry;
+}
+
+void TemplateInterpreterGenerator::count_bytecode() {
+ __ push_reg(t0);
+ __ push_reg(x10);
+ __ mv(x10, (address) &BytecodeCounter::_counter_value);
+ __ li(t0, 1);
+ __ amoadd_d(zr, x10, t0, Assembler::aqrl);
+ __ pop_reg(x10);
+ __ pop_reg(t0);
+}
+
+void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { ; }
+
+void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { ; }
+
+void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
+ // Call a little run-time stub to avoid blow-up for each bytecode.
+ // The run-time runtime saves the right registers, depending on
+ // the tosca in-state for the given template.
+
+ assert(Interpreter::trace_code(t->tos_in()) != NULL, "entry must have been generated");
+ __ jal(Interpreter::trace_code(t->tos_in()));
+ __ reinit_heapbase();
+}
+
+void TemplateInterpreterGenerator::stop_interpreter_at() {
+ Label L;
+ __ push_reg(t0);
+ __ mv(t0, (address) &BytecodeCounter::_counter_value);
+ __ ld(t0, Address(t0));
+ __ mv(t1, StopInterpreterAt);
+ __ bne(t0, t1, L);
+ __ ebreak();
+ __ bind(L);
+ __ pop_reg(t0);
+}
+
+#endif // !PRODUCT
diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d2a301c6e740cffe7257fbb97ae375894c829986
--- /dev/null
+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
@@ -0,0 +1,3951 @@
+/*
+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "gc/shared/tlab_globals.hpp"
+#include "interpreter/interp_masm.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "interpreter/templateTable.hpp"
+#include "memory/universe.hpp"
+#include "oops/method.hpp"
+#include "oops/methodData.hpp"
+#include "oops/objArrayKlass.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+#include "utilities/powerOfTwo.hpp"
+
+#define __ _masm->
+
+// Address computation: local variables
+
+static inline Address iaddress(int n) {
+ return Address(xlocals, Interpreter::local_offset_in_bytes(n));
+}
+
+static inline Address laddress(int n) {
+ return iaddress(n + 1);
+}
+
+static inline Address faddress(int n) {
+ return iaddress(n);
+}
+
+static inline Address daddress(int n) {
+ return laddress(n);
+}
+
+static inline Address aaddress(int n) {
+ return iaddress(n);
+}
+
+static inline Address iaddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
+ assert_cond(_masm != NULL);
+ _masm->shadd(temp, r, xlocals, temp, 3);
+ return Address(temp, 0);
+}
+
+static inline Address laddress(Register r, Register temp,
+ InterpreterMacroAssembler* _masm) {
+ assert_cond(_masm != NULL);
+ _masm->shadd(temp, r, xlocals, temp, 3);
+ return Address(temp, Interpreter::local_offset_in_bytes(1));;
+}
+
+static inline Address faddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
+ return iaddress(r, temp, _masm);
+}
+
+static inline Address daddress(Register r, Register temp,
+ InterpreterMacroAssembler* _masm) {
+ return laddress(r, temp, _masm);
+}
+
+static inline Address aaddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
+ return iaddress(r, temp, _masm);
+}
+
+static inline Address at_rsp() {
+ return Address(esp, 0);
+}
+
+// At top of Java expression stack which may be different than esp(). It
+// isn't for category 1 objects.
+static inline Address at_tos () {
+ return Address(esp, Interpreter::expr_offset_in_bytes(0));
+}
+
+static inline Address at_tos_p1() {
+ return Address(esp, Interpreter::expr_offset_in_bytes(1));
+}
+
+static inline Address at_tos_p2() {
+ return Address(esp, Interpreter::expr_offset_in_bytes(2));
+}
+
+static inline Address at_tos_p3() {
+ return Address(esp, Interpreter::expr_offset_in_bytes(3));
+}
+
+static inline Address at_tos_p4() {
+ return Address(esp, Interpreter::expr_offset_in_bytes(4));
+}
+
+static inline Address at_tos_p5() {
+ return Address(esp, Interpreter::expr_offset_in_bytes(5));
+}
+
+// Miscelaneous helper routines
+// Store an oop (or NULL) at the Address described by obj.
+// If val == noreg this means store a NULL
+static void do_oop_store(InterpreterMacroAssembler* _masm,
+ Address dst,
+ Register val,
+ DecoratorSet decorators) {
+ assert(val == noreg || val == x10, "parameter is just for looks");
+ assert_cond(_masm != NULL);
+ __ store_heap_oop(dst, val, x29, x11, decorators);
+}
+
+static void do_oop_load(InterpreterMacroAssembler* _masm,
+ Address src,
+ Register dst,
+ DecoratorSet decorators) {
+ assert_cond(_masm != NULL);
+ __ load_heap_oop(dst, src, x7, x11, decorators);
+}
+
+Address TemplateTable::at_bcp(int offset) {
+ assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
+ return Address(xbcp, offset);
+}
+
+void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
+ Register temp_reg, bool load_bc_into_bc_reg/*=true*/,
+ int byte_no)
+{
+ if (!RewriteBytecodes) { return; }
+ Label L_patch_done;
+
+ switch (bc) {
+ case Bytecodes::_fast_aputfield: // fall through
+ case Bytecodes::_fast_bputfield: // fall through
+ case Bytecodes::_fast_zputfield: // fall through
+ case Bytecodes::_fast_cputfield: // fall through
+ case Bytecodes::_fast_dputfield: // fall through
+ case Bytecodes::_fast_fputfield: // fall through
+ case Bytecodes::_fast_iputfield: // fall through
+ case Bytecodes::_fast_lputfield: // fall through
+ case Bytecodes::_fast_sputfield: {
+ // We skip bytecode quickening for putfield instructions when
+ // the put_code written to the constant pool cache is zero.
+ // This is required so that every execution of this instruction
+ // calls out to InterpreterRuntime::resolve_get_put to do
+ // additional, required work.
+ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
+ assert(load_bc_into_bc_reg, "we use bc_reg as temp");
+ __ get_cache_and_index_and_bytecode_at_bcp(temp_reg, bc_reg, temp_reg, byte_no, 1);
+ __ mv(bc_reg, bc);
+ __ beqz(temp_reg, L_patch_done);
+ break;
+ }
+ default:
+ assert(byte_no == -1, "sanity");
+ // the pair bytecodes have already done the load.
+ if (load_bc_into_bc_reg) {
+ __ mv(bc_reg, bc);
+ }
+ }
+
+ if (JvmtiExport::can_post_breakpoint()) {
+ Label L_fast_patch;
+ // if a breakpoint is present we can't rewrite the stream directly
+ __ load_unsigned_byte(temp_reg, at_bcp(0));
+ __ addi(temp_reg, temp_reg, -Bytecodes::_breakpoint); // temp_reg is temporary register.
+ __ bnez(temp_reg, L_fast_patch);
+ // Let breakpoint table handling rewrite to quicker bytecode
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), xmethod, xbcp, bc_reg);
+ __ j(L_patch_done);
+ __ bind(L_fast_patch);
+ }
+
+#ifdef ASSERT
+ Label L_okay;
+ __ load_unsigned_byte(temp_reg, at_bcp(0));
+ __ beq(temp_reg, bc_reg, L_okay);
+ __ addi(temp_reg, temp_reg, -(int) Bytecodes::java_code(bc));
+ __ beqz(temp_reg, L_okay);
+ __ stop("patching the wrong bytecode");
+ __ bind(L_okay);
+#endif
+
+ // patch bytecode
+ __ sb(bc_reg, at_bcp(0));
+ __ bind(L_patch_done);
+}
+
+// Individual instructions
+
+void TemplateTable::nop() {
+ transition(vtos, vtos);
+ // nothing to do
+}
+
+void TemplateTable::shouldnotreachhere() {
+ transition(vtos, vtos);
+ __ stop("should not reach here bytecode");
+}
+
+void TemplateTable::aconst_null()
+{
+ transition(vtos, atos);
+ __ mv(x10, zr);
+}
+
+void TemplateTable::iconst(int value)
+{
+ transition(vtos, itos);
+ __ li(x10, value);
+}
+
+void TemplateTable::lconst(int value)
+{
+ transition(vtos, ltos);
+ __ li(x10, value);
+}
+
+void TemplateTable::fconst(int value)
+{
+ transition(vtos, ftos);
+ static float fBuf[2] = {1.0, 2.0};
+ __ mv(t0, (intptr_t)fBuf);
+ switch (value) {
+ case 0:
+ __ fmv_w_x(f10, zr);
+ break;
+ case 1:
+ __ flw(f10, t0, 0);
+ break;
+ case 2:
+ __ flw(f10, t0, sizeof(float));
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+}
+
+void TemplateTable::dconst(int value)
+{
+ transition(vtos, dtos);
+ static double dBuf[2] = {1.0, 2.0};
+ __ mv(t0, (intptr_t)dBuf);
+ switch (value) {
+ case 0:
+ __ fmv_d_x(f10, zr);
+ break;
+ case 1:
+ __ fld(f10, t0, 0);
+ break;
+ case 2:
+ __ fld(f10, t0, sizeof(double));
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+}
+
+void TemplateTable::bipush()
+{
+ transition(vtos, itos);
+ __ load_signed_byte(x10, at_bcp(1));
+}
+
+void TemplateTable::sipush()
+{
+ transition(vtos, itos);
+ __ load_unsigned_short(x10, at_bcp(1));
+ __ revb_w_w(x10, x10);
+ __ sraiw(x10, x10, 16);
+}
+
+void TemplateTable::ldc(bool wide)
+{
+ transition(vtos, vtos);
+ Label call_ldc, notFloat, notClass, notInt, Done;
+
+ if (wide) {
+ __ get_unsigned_2_byte_index_at_bcp(x11, 1);
+ } else {
+ __ load_unsigned_byte(x11, at_bcp(1));
+ }
+ __ get_cpool_and_tags(x12, x10);
+
+ const int base_offset = ConstantPool::header_size() * wordSize;
+ const int tags_offset = Array::base_offset_in_bytes();
+
+ // get type
+ __ addi(x13, x11, tags_offset);
+ __ add(x13, x10, x13);
+ __ membar(MacroAssembler::AnyAny);
+ __ lbu(x13, Address(x13, 0));
+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+
+ // unresolved class - get the resolved class
+ __ mv(t1, (u1)JVM_CONSTANT_UnresolvedClass);
+ __ beq(x13, t1, call_ldc);
+
+ // unresolved class in error state - call into runtime to throw the error
+ // from the first resolution attempt
+ __ mv(t1, (u1)JVM_CONSTANT_UnresolvedClassInError);
+ __ beq(x13, t1, call_ldc);
+
+ // resolved class - need to call vm to get java mirror of the class
+ __ mv(t1, (u1)JVM_CONSTANT_Class);
+ __ bne(x13, t1, notClass);
+
+ __ bind(call_ldc);
+ __ mv(c_rarg1, wide);
+ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), c_rarg1);
+ __ push_ptr(x10);
+ __ verify_oop(x10);
+ __ j(Done);
+
+ __ bind(notClass);
+ __ mv(t1, (u1)JVM_CONSTANT_Float);
+ __ bne(x13, t1, notFloat);
+
+ // ftos
+ __ shadd(x11, x11, x12, x11, 3);
+ __ flw(f10, Address(x11, base_offset));
+ __ push_f(f10);
+ __ j(Done);
+
+ __ bind(notFloat);
+
+ __ mv(t1, (u1)JVM_CONSTANT_Integer);
+ __ bne(x13, t1, notInt);
+
+ // itos
+ __ shadd(x11, x11, x12, x11, 3);
+ __ lw(x10, Address(x11, base_offset));
+ __ push_i(x10);
+ __ j(Done);
+
+ __ bind(notInt);
+ condy_helper(Done);
+
+ __ bind(Done);
+}
+
+// Fast path for caching oop constants.
+void TemplateTable::fast_aldc(bool wide)
+{
+ transition(vtos, atos);
+
+ const Register result = x10;
+ const Register tmp = x11;
+ const Register rarg = x12;
+
+ const int index_size = wide ? sizeof(u2) : sizeof(u1);
+
+ Label resolved;
+
+ // We are resolved if the resolved reference cache entry contains a
+ // non-null object (String, MethodType, etc.)
+ assert_different_registers(result, tmp);
+ __ get_cache_index_at_bcp(tmp, 1, index_size);
+ __ load_resolved_reference_at_index(result, tmp);
+ __ bnez(result, resolved);
+
+ const address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
+
+ // first time invocation - must resolve first
+ __ mv(rarg, (int)bytecode());
+ __ call_VM(result, entry, rarg);
+
+ __ bind(resolved);
+
+ { // Check for the null sentinel.
+ // If we just called the VM, it already did the mapping for us,
+ // but it's harmless to retry.
+ Label notNull;
+
+ // Stash null_sentinel address to get its value later
+ int32_t offset = 0;
+ __ movptr_with_offset(rarg, Universe::the_null_sentinel_addr(), offset);
+ __ ld(tmp, Address(rarg, offset));
+ __ resolve_oop_handle(tmp);
+ __ bne(result, tmp, notNull);
+ __ mv(result, zr); // NULL object reference
+ __ bind(notNull);
+ }
+
+ if (VerifyOops) {
+ // Safe to call with 0 result
+ __ verify_oop(result);
+ }
+}
+
+void TemplateTable::ldc2_w()
+{
+ transition(vtos, vtos);
+ Label notDouble, notLong, Done;
+ __ get_unsigned_2_byte_index_at_bcp(x10, 1);
+
+ __ get_cpool_and_tags(x11, x12);
+ const int base_offset = ConstantPool::header_size() * wordSize;
+ const int tags_offset = Array::base_offset_in_bytes();
+
+ // get type
+ __ add(x12, x12, x10);
+ __ load_unsigned_byte(x12, Address(x12, tags_offset));
+ __ mv(t1, JVM_CONSTANT_Double);
+ __ bne(x12, t1, notDouble);
+
+ // dtos
+ __ shadd(x12, x10, x11, x12, 3);
+ __ fld(f10, Address(x12, base_offset));
+ __ push_d(f10);
+ __ j(Done);
+
+ __ bind(notDouble);
+ __ mv(t1, (int)JVM_CONSTANT_Long);
+ __ bne(x12, t1, notLong);
+
+ // ltos
+ __ shadd(x10, x10, x11, x10, 3);
+ __ ld(x10, Address(x10, base_offset));
+ __ push_l(x10);
+ __ j(Done);
+
+ __ bind(notLong);
+ condy_helper(Done);
+ __ bind(Done);
+}
+
+void TemplateTable::condy_helper(Label& Done)
+{
+ const Register obj = x10;
+ const Register rarg = x11;
+ const Register flags = x12;
+ const Register off = x13;
+
+ const address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
+
+ __ mv(rarg, (int) bytecode());
+ __ call_VM(obj, entry, rarg);
+
+ __ get_vm_result_2(flags, xthread);
+
+ // VMr = obj = base address to find primitive value to push
+ // VMr2 = flags = (tos, off) using format of CPCE::_flags
+ __ mv(off, flags);
+ __ mv(t0, ConstantPoolCacheEntry::field_index_mask);
+ __ andrw(off, off, t0);
+
+ __ add(off, obj, off);
+ const Address field(off, 0); // base + R---->base + offset
+
+ __ slli(flags, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits));
+ __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> flags:0~3
+
+ switch (bytecode()) {
+ case Bytecodes::_ldc: // fall through
+ case Bytecodes::_ldc_w: {
+ // tos in (itos, ftos, stos, btos, ctos, ztos)
+ Label notInt, notFloat, notShort, notByte, notChar, notBool;
+ __ mv(t1, itos);
+ __ bne(flags, t1, notInt);
+ // itos
+ __ lw(x10, field);
+ __ push(itos);
+ __ j(Done);
+
+ __ bind(notInt);
+ __ mv(t1, ftos);
+ __ bne(flags, t1, notFloat);
+ // ftos
+ __ load_float(field);
+ __ push(ftos);
+ __ j(Done);
+
+ __ bind(notFloat);
+ __ mv(t1, stos);
+ __ bne(flags, t1, notShort);
+ // stos
+ __ load_signed_short(x10, field);
+ __ push(stos);
+ __ j(Done);
+
+ __ bind(notShort);
+ __ mv(t1, btos);
+ __ bne(flags, t1, notByte);
+ // btos
+ __ load_signed_byte(x10, field);
+ __ push(btos);
+ __ j(Done);
+
+ __ bind(notByte);
+ __ mv(t1, ctos);
+ __ bne(flags, t1, notChar);
+ // ctos
+ __ load_unsigned_short(x10, field);
+ __ push(ctos);
+ __ j(Done);
+
+ __ bind(notChar);
+ __ mv(t1, ztos);
+ __ bne(flags, t1, notBool);
+ // ztos
+ __ load_signed_byte(x10, field);
+ __ push(ztos);
+ __ j(Done);
+
+ __ bind(notBool);
+ break;
+ }
+
+ case Bytecodes::_ldc2_w: {
+ Label notLong, notDouble;
+ __ mv(t1, ltos);
+ __ bne(flags, t1, notLong);
+ // ltos
+ __ ld(x10, field);
+ __ push(ltos);
+ __ j(Done);
+
+ __ bind(notLong);
+ __ mv(t1, dtos);
+ __ bne(flags, t1, notDouble);
+ // dtos
+ __ load_double(field);
+ __ push(dtos);
+ __ j(Done);
+
+ __ bind(notDouble);
+ break;
+ }
+
+ default:
+ ShouldNotReachHere();
+ }
+
+ __ stop("bad ldc/condy");
+}
+
+void TemplateTable::locals_index(Register reg, int offset)
+{
+ __ lbu(reg, at_bcp(offset));
+ __ neg(reg, reg);
+}
+
+void TemplateTable::iload() {
+ iload_internal();
+}
+
+void TemplateTable::nofast_iload() {
+ iload_internal(may_not_rewrite);
+}
+
+void TemplateTable::iload_internal(RewriteControl rc) {
+ transition(vtos, itos);
+ if (RewriteFrequentPairs && rc == may_rewrite) {
+ Label rewrite, done;
+ const Register bc = x14;
+
+ // get next bytecode
+ __ load_unsigned_byte(x11, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
+
+ // if _iload, wait to rewrite to iload2. We only want to rewrite the
+ // last two iloads in a pair. Comparing against fast_iload means that
+ // the next bytecode is neither an iload or a caload, and therefore
+ // an iload pair.
+ __ mv(t1, Bytecodes::_iload);
+ __ beq(x11, t1, done);
+
+ // if _fast_iload rewrite to _fast_iload2
+ __ mv(t1, Bytecodes::_fast_iload);
+ __ mv(bc, Bytecodes::_fast_iload2);
+ __ beq(x11, t1, rewrite);
+
+ // if _caload rewrite to _fast_icaload
+ __ mv(t1, Bytecodes::_caload);
+ __ mv(bc, Bytecodes::_fast_icaload);
+ __ beq(x11, t1, rewrite);
+
+ // else rewrite to _fast_iload
+ __ mv(bc, Bytecodes::_fast_iload);
+
+ // rewrite
+ // bc: new bytecode
+ __ bind(rewrite);
+ patch_bytecode(Bytecodes::_iload, bc, x11, false);
+ __ bind(done);
+
+ }
+
+ // do iload, get the local value into tos
+ locals_index(x11);
+ __ lw(x10, iaddress(x11, x10, _masm));
+}
+
+void TemplateTable::fast_iload2()
+{
+ transition(vtos, itos);
+ locals_index(x11);
+ __ lw(x10, iaddress(x11, x10, _masm));
+ __ push(itos);
+ locals_index(x11, 3);
+ __ lw(x10, iaddress(x11, x10, _masm));
+}
+
+void TemplateTable::fast_iload()
+{
+ transition(vtos, itos);
+ locals_index(x11);
+ __ lw(x10, iaddress(x11, x10, _masm));
+}
+
+void TemplateTable::lload()
+{
+ transition(vtos, ltos);
+ __ lbu(x11, at_bcp(1));
+ __ slli(x11, x11, LogBytesPerWord);
+ __ sub(x11, xlocals, x11);
+ __ ld(x10, Address(x11, Interpreter::local_offset_in_bytes(1)));
+}
+
+void TemplateTable::fload()
+{
+ transition(vtos, ftos);
+ locals_index(x11);
+ __ flw(f10, faddress(x11, t0, _masm));
+}
+
+void TemplateTable::dload()
+{
+ transition(vtos, dtos);
+ __ lbu(x11, at_bcp(1));
+ __ slli(x11, x11, LogBytesPerWord);
+ __ sub(x11, xlocals, x11);
+ __ fld(f10, Address(x11, Interpreter::local_offset_in_bytes(1)));
+}
+
+void TemplateTable::aload()
+{
+ transition(vtos, atos);
+ locals_index(x11);
+ __ ld(x10, iaddress(x11, x10, _masm));
+
+}
+
+void TemplateTable::locals_index_wide(Register reg) {
+ __ lhu(reg, at_bcp(2));
+ __ revb_h_h_u(reg, reg); // reverse bytes in half-word and zero-extend
+ __ neg(reg, reg);
+}
+
+void TemplateTable::wide_iload() {
+ transition(vtos, itos);
+ locals_index_wide(x11);
+ __ lw(x10, iaddress(x11, t0, _masm));
+}
+
+void TemplateTable::wide_lload()
+{
+ transition(vtos, ltos);
+ __ lhu(x11, at_bcp(2));
+ __ revb_h_h_u(x11, x11); // reverse bytes in half-word and zero-extend
+ __ slli(x11, x11, LogBytesPerWord);
+ __ sub(x11, xlocals, x11);
+ __ ld(x10, Address(x11, Interpreter::local_offset_in_bytes(1)));
+}
+
+void TemplateTable::wide_fload()
+{
+ transition(vtos, ftos);
+ locals_index_wide(x11);
+ __ flw(f10, faddress(x11, t0, _masm));
+}
+
+void TemplateTable::wide_dload()
+{
+ transition(vtos, dtos);
+ __ lhu(x11, at_bcp(2));
+ __ revb_h_h_u(x11, x11); // reverse bytes in half-word and zero-extend
+ __ slli(x11, x11, LogBytesPerWord);
+ __ sub(x11, xlocals, x11);
+ __ fld(f10, Address(x11, Interpreter::local_offset_in_bytes(1)));
+}
+
+void TemplateTable::wide_aload()
+{
+ transition(vtos, atos);
+ locals_index_wide(x11);
+ __ ld(x10, aaddress(x11, t0, _masm));
+}
+
+void TemplateTable::index_check(Register array, Register index)
+{
+ // destroys x11, t0
+ // check array
+ __ null_check(array, arrayOopDesc::length_offset_in_bytes());
+ // sign extend index for use by indexed load
+ // check index
+ const Register length = t0;
+ __ lwu(length, Address(array, arrayOopDesc::length_offset_in_bytes()));
+ if (index != x11) {
+ assert(x11 != array, "different registers");
+ __ mv(x11, index);
+ }
+ Label ok;
+ __ addw(index, index, zr);
+ __ bltu(index, length, ok);
+ __ mv(x13, array);
+ __ mv(t0, Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
+ __ jr(t0);
+ __ bind(ok);
+}
+
+void TemplateTable::iaload()
+{
+ transition(itos, itos);
+ __ mv(x11, x10);
+ __ pop_ptr(x10);
+ // x10: array
+ // x11: index
+ index_check(x10, x11); // leaves index in x11
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2);
+ __ shadd(x10, x11, x10, t0, 2);
+ __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
+ __ addw(x10, x10, zr); // signed extended
+}
+
+void TemplateTable::laload()
+{
+ transition(itos, ltos);
+ __ mv(x11, x10);
+ __ pop_ptr(x10);
+ // x10: array
+ // x11: index
+ index_check(x10, x11); // leaves index in x11
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3);
+ __ shadd(x10, x11, x10, t0, 3);
+ __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
+}
+
+void TemplateTable::faload()
+{
+ transition(itos, ftos);
+ __ mv(x11, x10);
+ __ pop_ptr(x10);
+ // x10: array
+ // x11: index
+ index_check(x10, x11); // leaves index in x11
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2);
+ __ shadd(x10, x11, x10, t0, 2);
+ __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
+}
+
+void TemplateTable::daload()
+{
+ transition(itos, dtos);
+ __ mv(x11, x10);
+ __ pop_ptr(x10);
+ // x10: array
+ // x11: index
+ index_check(x10, x11); // leaves index in x11
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3);
+ __ shadd(x10, x11, x10, t0, 3);
+ __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
+}
+
+void TemplateTable::aaload()
+{
+ transition(itos, atos);
+ __ mv(x11, x10);
+ __ pop_ptr(x10);
+ // x10: array
+ // x11: index
+ index_check(x10, x11); // leaves index in x11
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
+ __ shadd(x10, x11, x10, t0, LogBytesPerHeapOop);
+ do_oop_load(_masm,
+ Address(x10),
+ x10,
+ IS_ARRAY);
+}
+
+void TemplateTable::baload()
+{
+ transition(itos, itos);
+ __ mv(x11, x10);
+ __ pop_ptr(x10);
+ // x10: array
+ // x11: index
+ index_check(x10, x11); // leaves index in x11
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0);
+ __ shadd(x10, x11, x10, t0, 0);
+ __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
+}
+
+void TemplateTable::caload()
+{
+ transition(itos, itos);
+ __ mv(x11, x10);
+ __ pop_ptr(x10);
+ // x10: array
+ // x11: index
+ index_check(x10, x11); // leaves index in x11
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1);
+ __ shadd(x10, x11, x10, t0, 1);
+ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
+}
+
+// iload followed by caload frequent pair
+void TemplateTable::fast_icaload()
+{
+ transition(vtos, itos);
+ // load index out of locals
+ locals_index(x12);
+ __ lw(x11, iaddress(x12, x11, _masm));
+ __ pop_ptr(x10);
+
+ // x10: array
+ // x11: index
+ index_check(x10, x11); // leaves index in x11, kills t0
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); // addi, max imm is 2^11
+ __ shadd(x10, x11, x10, t0, 1);
+ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
+}
+
+void TemplateTable::saload()
+{
+ transition(itos, itos);
+ __ mv(x11, x10);
+ __ pop_ptr(x10);
+ // x10: array
+ // x11: index
+ index_check(x10, x11); // leaves index in x11, kills t0
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_SHORT) >> 1);
+ __ shadd(x10, x11, x10, t0, 1);
+ __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
+}
+
+void TemplateTable::iload(int n)
+{
+ transition(vtos, itos);
+ __ lw(x10, iaddress(n));
+}
+
+void TemplateTable::lload(int n)
+{
+ transition(vtos, ltos);
+ __ ld(x10, laddress(n));
+}
+
+void TemplateTable::fload(int n)
+{
+ transition(vtos, ftos);
+ __ flw(f10, faddress(n));
+}
+
+void TemplateTable::dload(int n)
+{
+ transition(vtos, dtos);
+ __ fld(f10, daddress(n));
+}
+
+void TemplateTable::aload(int n)
+{
+ transition(vtos, atos);
+ __ ld(x10, iaddress(n));
+}
+
+void TemplateTable::aload_0() {
+ aload_0_internal();
+}
+
+void TemplateTable::nofast_aload_0() {
+ aload_0_internal(may_not_rewrite);
+}
+
+void TemplateTable::aload_0_internal(RewriteControl rc) {
+ // According to bytecode histograms, the pairs:
+ //
+ // _aload_0, _fast_igetfield
+ // _aload_0, _fast_agetfield
+ // _aload_0, _fast_fgetfield
+ //
+ // occur frequently. If RewriteFrequentPairs is set, the (slow)
+ // _aload_0 bytecode checks if the next bytecode is either
+ // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
+ // rewrites the current bytecode into a pair bytecode; otherwise it
+ // rewrites the current bytecode into _fast_aload_0 that doesn't do
+ // the pair check anymore.
+ //
+ // Note: If the next bytecode is _getfield, the rewrite must be
+ // delayed, otherwise we may miss an opportunity for a pair.
+ //
+ // Also rewrite frequent pairs
+ // aload_0, aload_1
+ // aload_0, iload_1
+ // These bytecodes with a small amount of code are most profitable
+ // to rewrite
+ if (RewriteFrequentPairs && rc == may_rewrite) {
+ Label rewrite, done;
+ const Register bc = x14;
+
+ // get next bytecode
+ __ load_unsigned_byte(x11, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
+
+ // if _getfield then wait with rewrite
+ __ mv(t1, Bytecodes::Bytecodes::_getfield);
+ __ beq(x11, t1, done);
+
+ // if _igetfield then rewrite to _fast_iaccess_0
+ assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
+ __ mv(t1, Bytecodes::_fast_igetfield);
+ __ mv(bc, Bytecodes::_fast_iaccess_0);
+ __ beq(x11, t1, rewrite);
+
+ // if _agetfield then rewrite to _fast_aaccess_0
+ assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
+ __ mv(t1, Bytecodes::_fast_agetfield);
+ __ mv(bc, Bytecodes::_fast_aaccess_0);
+ __ beq(x11, t1, rewrite);
+
+ // if _fgetfield then rewrite to _fast_faccess_0
+ assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
+ __ mv(t1, Bytecodes::_fast_fgetfield);
+ __ mv(bc, Bytecodes::_fast_faccess_0);
+ __ beq(x11, t1, rewrite);
+
+ // else rewrite to _fast_aload0
+ assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition");
+ __ mv(bc, Bytecodes::Bytecodes::_fast_aload_0);
+
+ // rewrite
+ // bc: new bytecode
+ __ bind(rewrite);
+ patch_bytecode(Bytecodes::_aload_0, bc, x11, false);
+
+ __ bind(done);
+ }
+
+ // Do actual aload_0 (must do this after patch_bytecode which might call VM and GC might change oop).
+ aload(0);
+}
+
+void TemplateTable::istore()
+{
+ transition(itos, vtos);
+ locals_index(x11);
+ __ sw(x10, iaddress(x11, t0, _masm));
+}
+
+void TemplateTable::lstore()
+{
+ transition(ltos, vtos);
+ locals_index(x11);
+ __ sd(x10, laddress(x11, t0, _masm));
+}
+
+void TemplateTable::fstore() {
+ transition(ftos, vtos);
+ locals_index(x11);
+ __ fsw(f10, iaddress(x11, t0, _masm));
+}
+
+void TemplateTable::dstore() {
+ transition(dtos, vtos);
+ locals_index(x11);
+ __ fsd(f10, daddress(x11, t0, _masm));
+}
+
+void TemplateTable::astore()
+{
+ transition(vtos, vtos);
+ __ pop_ptr(x10);
+ locals_index(x11);
+ __ sd(x10, aaddress(x11, t0, _masm));
+}
+
+void TemplateTable::wide_istore() {
+ transition(vtos, vtos);
+ __ pop_i();
+ locals_index_wide(x11);
+ __ sw(x10, iaddress(x11, t0, _masm));
+}
+
+void TemplateTable::wide_lstore() {
+ transition(vtos, vtos);
+ __ pop_l();
+ locals_index_wide(x11);
+ __ sd(x10, laddress(x11, t0, _masm));
+}
+
+void TemplateTable::wide_fstore() {
+ transition(vtos, vtos);
+ __ pop_f();
+ locals_index_wide(x11);
+ __ fsw(f10, faddress(x11, t0, _masm));
+}
+
+void TemplateTable::wide_dstore() {
+ transition(vtos, vtos);
+ __ pop_d();
+ locals_index_wide(x11);
+ __ fsd(f10, daddress(x11, t0, _masm));
+}
+
+void TemplateTable::wide_astore() {
+ transition(vtos, vtos);
+ __ pop_ptr(x10);
+ locals_index_wide(x11);
+ __ sd(x10, aaddress(x11, t0, _masm));
+}
+
+void TemplateTable::iastore() {
+ transition(itos, vtos);
+ __ pop_i(x11);
+ __ pop_ptr(x13);
+ // x10: value
+ // x11: index
+ // x13: array
+ index_check(x13, x11); // prefer index in x11
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2);
+ __ shadd(t0, x11, x13, t0, 2);
+ __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg);
+}
+
+void TemplateTable::lastore() {
+ transition(ltos, vtos);
+ __ pop_i(x11);
+ __ pop_ptr(x13);
+ // x10: value
+ // x11: index
+ // x13: array
+ index_check(x13, x11); // prefer index in x11
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3);
+ __ shadd(t0, x11, x13, t0, 3);
+ __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg);
+}
+
+void TemplateTable::fastore() {
+ transition(ftos, vtos);
+ __ pop_i(x11);
+ __ pop_ptr(x13);
+ // f10: value
+ // x11: index
+ // x13: array
+ index_check(x13, x11); // prefer index in x11
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2);
+ __ shadd(t0, x11, x13, t0, 2);
+ __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* ftos */, noreg, noreg);
+}
+
+void TemplateTable::dastore() {
+ transition(dtos, vtos);
+ __ pop_i(x11);
+ __ pop_ptr(x13);
+ // f10: value
+ // x11: index
+ // x13: array
+ index_check(x13, x11); // prefer index in x11
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3);
+ __ shadd(t0, x11, x13, t0, 3);
+ __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* dtos */, noreg, noreg);
+}
+
+void TemplateTable::aastore() {
+ Label is_null, ok_is_subtype, done;
+ transition(vtos, vtos);
+ // stack: ..., array, index, value
+ __ ld(x10, at_tos()); // value
+ __ ld(x12, at_tos_p1()); // index
+ __ ld(x13, at_tos_p2()); // array
+
+ index_check(x13, x12); // kills x11
+ __ add(x14, x12, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
+ __ shadd(x14, x14, x13, x14, LogBytesPerHeapOop);
+
+ Address element_address(x14, 0);
+
+ // do array store check - check for NULL value first
+ __ beqz(x10, is_null);
+
+ // Move subklass into x11
+ __ load_klass(x11, x10);
+ // Move superklass into x10
+ __ load_klass(x10, x13);
+ __ ld(x10, Address(x10,
+ ObjArrayKlass::element_klass_offset()));
+ // Compress array + index * oopSize + 12 into a single register. Frees x12.
+
+ // Generate subtype check. Blows x12, x15
+ // Superklass in x10. Subklass in x11.
+ __ gen_subtype_check(x11, ok_is_subtype); //todo
+
+ // Come here on failure
+ // object is at TOS
+ __ j(Interpreter::_throw_ArrayStoreException_entry);
+
+ // Come here on success
+ __ bind(ok_is_subtype);
+
+ // Get the value we will store
+ __ ld(x10, at_tos());
+ // Now store using the appropriate barrier
+ do_oop_store(_masm, element_address, x10, IS_ARRAY);
+ __ j(done);
+
+ // Have a NULL in x10, x13=array, x12=index. Store NULL at ary[idx]
+ __ bind(is_null);
+ __ profile_null_seen(x12);
+
+ // Store a NULL
+ do_oop_store(_masm, element_address, noreg, IS_ARRAY);
+
+ // Pop stack arguments
+ __ bind(done);
+ __ add(esp, esp, 3 * Interpreter::stackElementSize);
+
+}
+
+void TemplateTable::bastore()
+{
+ transition(itos, vtos);
+ __ pop_i(x11);
+ __ pop_ptr(x13);
+ // x10: value
+ // x11: index
+ // x13: array
+ index_check(x13, x11); // prefer index in x11
+
+ // Need to check whether array is boolean or byte
+ // since both types share the bastore bytecode.
+ __ load_klass(x12, x13);
+ __ lwu(x12, Address(x12, Klass::layout_helper_offset()));
+ Label L_skip;
+ __ andi(t0, x12, Klass::layout_helper_boolean_diffbit());
+ __ beqz(t0, L_skip);
+ __ andi(x10, x10, 1); // if it is a T_BOOLEAN array, mask the stored value to 0/1
+ __ bind(L_skip);
+
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0);
+
+ __ add(x11, x13, x11);
+ __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(x11, 0), x10, noreg, noreg);
+}
+
+void TemplateTable::castore()
+{
+ transition(itos, vtos);
+ __ pop_i(x11);
+ __ pop_ptr(x13);
+ // x10: value
+ // x11: index
+ // x13: array
+ index_check(x13, x11); // prefer index in x11
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1);
+ __ shadd(t0, x11, x13, t0, 1);
+ __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg);
+}
+
+void TemplateTable::sastore()
+{
+ castore();
+}
+
+void TemplateTable::istore(int n)
+{
+ transition(itos, vtos);
+ __ sd(x10, iaddress(n));
+}
+
+void TemplateTable::lstore(int n)
+{
+ transition(ltos, vtos);
+ __ sd(x10, laddress(n));
+}
+
+void TemplateTable::fstore(int n)
+{
+ transition(ftos, vtos);
+ __ fsw(f10, faddress(n));
+}
+
+void TemplateTable::dstore(int n)
+{
+ transition(dtos, vtos);
+ __ fsd(f10, daddress(n));
+}
+
+void TemplateTable::astore(int n)
+{
+ transition(vtos, vtos);
+ __ pop_ptr(x10);
+ __ sd(x10, iaddress(n));
+}
+
+void TemplateTable::pop()
+{
+ transition(vtos, vtos);
+ __ addi(esp, esp, Interpreter::stackElementSize);
+}
+
+void TemplateTable::pop2()
+{
+ transition(vtos, vtos);
+ __ addi(esp, esp, 2 * Interpreter::stackElementSize);
+}
+
+void TemplateTable::dup()
+{
+ transition(vtos, vtos);
+ __ ld(x10, Address(esp, 0));
+ __ push_reg(x10);
+ // stack: ..., a, a
+}
+
+void TemplateTable::dup_x1()
+{
+ transition(vtos, vtos);
+ // stack: ..., a, b
+ __ ld(x10, at_tos()); // load b
+ __ ld(x12, at_tos_p1()); // load a
+ __ sd(x10, at_tos_p1()); // store b
+ __ sd(x12, at_tos()); // store a
+ __ push_reg(x10); // push b
+ // stack: ..., b, a, b
+}
+
+void TemplateTable::dup_x2()
+{
+ transition(vtos, vtos);
+ // stack: ..., a, b, c
+ __ ld(x10, at_tos()); // load c
+ __ ld(x12, at_tos_p2()); // load a
+ __ sd(x10, at_tos_p2()); // store c in a
+ __ push_reg(x10); // push c
+ // stack: ..., c, b, c, c
+ __ ld(x10, at_tos_p2()); // load b
+ __ sd(x12, at_tos_p2()); // store a in b
+ // stack: ..., c, a, c, c
+ __ sd(x10, at_tos_p1()); // store b in c
+ // stack: ..., c, a, b, c
+}
+
+void TemplateTable::dup2()
+{
+ transition(vtos, vtos);
+ // stack: ..., a, b
+ __ ld(x10, at_tos_p1()); // load a
+ __ push_reg(x10); // push a
+ __ ld(x10, at_tos_p1()); // load b
+ __ push_reg(x10); // push b
+ // stack: ..., a, b, a, b
+}
+
+void TemplateTable::dup2_x1()
+{
+ transition(vtos, vtos);
+ // stack: ..., a, b, c
+ __ ld(x12, at_tos()); // load c
+ __ ld(x10, at_tos_p1()); // load b
+ __ push_reg(x10); // push b
+ __ push_reg(x12); // push c
+ // stack: ..., a, b, c, b, c
+ __ sd(x12, at_tos_p3()); // store c in b
+ // stack: ..., a, c, c, b, c
+ __ ld(x12, at_tos_p4()); // load a
+ __ sd(x12, at_tos_p2()); // store a in 2nd c
+ // stack: ..., a, c, a, b, c
+ __ sd(x10, at_tos_p4()); // store b in a
+ // stack: ..., b, c, a, b, c
+}
+
+void TemplateTable::dup2_x2()
+{
+ transition(vtos, vtos);
+ // stack: ..., a, b, c, d
+ __ ld(x12, at_tos()); // load d
+ __ ld(x10, at_tos_p1()); // load c
+ __ push_reg(x10); // push c
+ __ push_reg(x12); // push d
+ // stack: ..., a, b, c, d, c, d
+ __ ld(x10, at_tos_p4()); // load b
+ __ sd(x10, at_tos_p2()); // store b in d
+ __ sd(x12, at_tos_p4()); // store d in b
+ // stack: ..., a, d, c, b, c, d
+ __ ld(x12, at_tos_p5()); // load a
+ __ ld(x10, at_tos_p3()); // load c
+ __ sd(x12, at_tos_p3()); // store a in c
+ __ sd(x10, at_tos_p5()); // store c in a
+ // stack: ..., c, d, a, b, c, d
+}
+
+void TemplateTable::swap()
+{
+ transition(vtos, vtos);
+ // stack: ..., a, b
+ __ ld(x12, at_tos_p1()); // load a
+ __ ld(x10, at_tos()); // load b
+ __ sd(x12, at_tos()); // store a in b
+ __ sd(x10, at_tos_p1()); // store b in a
+ // stack: ..., b, a
+}
+
+void TemplateTable::iop2(Operation op)
+{
+ transition(itos, itos);
+ // x10 <== x11 op x10
+ __ pop_i(x11);
+ switch (op) {
+ case add : __ addw(x10, x11, x10); break;
+ case sub : __ subw(x10, x11, x10); break;
+ case mul : __ mulw(x10, x11, x10); break;
+ case _and : __ andrw(x10, x11, x10); break;
+ case _or : __ orrw(x10, x11, x10); break;
+ case _xor : __ xorrw(x10, x11, x10); break;
+ case shl : __ sllw(x10, x11, x10); break;
+ case shr : __ sraw(x10, x11, x10); break;
+ case ushr : __ srlw(x10, x11, x10); break;
+ default : ShouldNotReachHere();
+ }
+}
+
+void TemplateTable::lop2(Operation op)
+{
+ transition(ltos, ltos);
+ // x10 <== x11 op x10
+ __ pop_l(x11);
+ switch (op) {
+ case add : __ add(x10, x11, x10); break;
+ case sub : __ sub(x10, x11, x10); break;
+ case mul : __ mul(x10, x11, x10); break;
+ case _and : __ andr(x10, x11, x10); break;
+ case _or : __ orr(x10, x11, x10); break;
+ case _xor : __ xorr(x10, x11, x10); break;
+ default : ShouldNotReachHere();
+ }
+}
+
+void TemplateTable::idiv()
+{
+ transition(itos, itos);
+ // explicitly check for div0
+ Label no_div0;
+ __ bnez(x10, no_div0);
+ __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
+ __ jr(t0);
+ __ bind(no_div0);
+ __ pop_i(x11);
+ // x10 <== x11 idiv x10
+ __ corrected_idivl(x10, x11, x10, /* want_remainder */ false);
+}
+
+void TemplateTable::irem()
+{
+ transition(itos, itos);
+ // explicitly check for div0
+ Label no_div0;
+ __ bnez(x10, no_div0);
+ __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
+ __ jr(t0);
+ __ bind(no_div0);
+ __ pop_i(x11);
+ // x10 <== x11 irem x10
+ __ corrected_idivl(x10, x11, x10, /* want_remainder */ true);
+}
+
+void TemplateTable::lmul()
+{
+ transition(ltos, ltos);
+ __ pop_l(x11);
+ __ mul(x10, x10, x11);
+}
+
+void TemplateTable::ldiv()
+{
+ transition(ltos, ltos);
+ // explicitly check for div0
+ Label no_div0;
+ __ bnez(x10, no_div0);
+ __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
+ __ jr(t0);
+ __ bind(no_div0);
+ __ pop_l(x11);
+ // x10 <== x11 ldiv x10
+ __ corrected_idivq(x10, x11, x10, /* want_remainder */ false);
+}
+
+void TemplateTable::lrem()
+{
+ transition(ltos, ltos);
+ // explicitly check for div0
+ Label no_div0;
+ __ bnez(x10, no_div0);
+ __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
+ __ jr(t0);
+ __ bind(no_div0);
+ __ pop_l(x11);
+ // x10 <== x11 lrem x10
+ __ corrected_idivq(x10, x11, x10, /* want_remainder */ true);
+}
+
+void TemplateTable::lshl()
+{
+ transition(itos, ltos);
+ // shift count is in x10
+ __ pop_l(x11);
+ __ sll(x10, x11, x10);
+}
+
+void TemplateTable::lshr()
+{
+ transition(itos, ltos);
+ // shift count is in x10
+ __ pop_l(x11);
+ __ sra(x10, x11, x10);
+}
+
+void TemplateTable::lushr()
+{
+ transition(itos, ltos);
+ // shift count is in x10
+ __ pop_l(x11);
+ __ srl(x10, x11, x10);
+}
+
+void TemplateTable::fop2(Operation op)
+{
+ transition(ftos, ftos);
+ switch (op) {
+ case add:
+ __ pop_f(f11);
+ __ fadd_s(f10, f11, f10);
+ break;
+ case sub:
+ __ pop_f(f11);
+ __ fsub_s(f10, f11, f10);
+ break;
+ case mul:
+ __ pop_f(f11);
+ __ fmul_s(f10, f11, f10);
+ break;
+ case div:
+ __ pop_f(f11);
+ __ fdiv_s(f10, f11, f10);
+ break;
+ case rem:
+ __ fmv_s(f11, f10);
+ __ pop_f(f10);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem));
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+}
+
+void TemplateTable::dop2(Operation op)
+{
+ transition(dtos, dtos);
+ switch (op) {
+ case add:
+ __ pop_d(f11);
+ __ fadd_d(f10, f11, f10);
+ break;
+ case sub:
+ __ pop_d(f11);
+ __ fsub_d(f10, f11, f10);
+ break;
+ case mul:
+ __ pop_d(f11);
+ __ fmul_d(f10, f11, f10);
+ break;
+ case div:
+ __ pop_d(f11);
+ __ fdiv_d(f10, f11, f10);
+ break;
+ case rem:
+ __ fmv_d(f11, f10);
+ __ pop_d(f10);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem));
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+}
+
+void TemplateTable::ineg()
+{
+ transition(itos, itos);
+ __ negw(x10, x10);
+}
+
+void TemplateTable::lneg()
+{
+ transition(ltos, ltos);
+ __ neg(x10, x10);
+}
+
+void TemplateTable::fneg()
+{
+ transition(ftos, ftos);
+ __ fneg_s(f10, f10);
+}
+
+void TemplateTable::dneg()
+{
+ transition(dtos, dtos);
+ __ fneg_d(f10, f10);
+}
+
+void TemplateTable::iinc()
+{
+ transition(vtos, vtos);
+ __ load_signed_byte(x11, at_bcp(2)); // get constant
+ locals_index(x12);
+ __ ld(x10, iaddress(x12, x10, _masm));
+ __ addw(x10, x10, x11);
+ __ sd(x10, iaddress(x12, t0, _masm));
+}
+
+void TemplateTable::wide_iinc()
+{
+ transition(vtos, vtos);
+ __ lwu(x11, at_bcp(2)); // get constant and index
+ __ revb_h_w_u(x11, x11); // reverse bytes in half-word (32bit) and zero-extend
+ __ zero_extend(x12, x11, 16);
+ __ neg(x12, x12);
+ __ slli(x11, x11, 32);
+ __ srai(x11, x11, 48);
+ __ ld(x10, iaddress(x12, t0, _masm));
+ __ addw(x10, x10, x11);
+ __ sd(x10, iaddress(x12, t0, _masm));
+}
+
+void TemplateTable::convert()
+{
+ // Checking
+#ifdef ASSERT
+ {
+ TosState tos_in = ilgl;
+ TosState tos_out = ilgl;
+ switch (bytecode()) {
+ case Bytecodes::_i2l: // fall through
+ case Bytecodes::_i2f: // fall through
+ case Bytecodes::_i2d: // fall through
+ case Bytecodes::_i2b: // fall through
+ case Bytecodes::_i2c: // fall through
+ case Bytecodes::_i2s: tos_in = itos; break;
+ case Bytecodes::_l2i: // fall through
+ case Bytecodes::_l2f: // fall through
+ case Bytecodes::_l2d: tos_in = ltos; break;
+ case Bytecodes::_f2i: // fall through
+ case Bytecodes::_f2l: // fall through
+ case Bytecodes::_f2d: tos_in = ftos; break;
+ case Bytecodes::_d2i: // fall through
+ case Bytecodes::_d2l: // fall through
+ case Bytecodes::_d2f: tos_in = dtos; break;
+ default : ShouldNotReachHere();
+ }
+ switch (bytecode()) {
+ case Bytecodes::_l2i: // fall through
+ case Bytecodes::_f2i: // fall through
+ case Bytecodes::_d2i: // fall through
+ case Bytecodes::_i2b: // fall through
+ case Bytecodes::_i2c: // fall through
+ case Bytecodes::_i2s: tos_out = itos; break;
+ case Bytecodes::_i2l: // fall through
+ case Bytecodes::_f2l: // fall through
+ case Bytecodes::_d2l: tos_out = ltos; break;
+ case Bytecodes::_i2f: // fall through
+ case Bytecodes::_l2f: // fall through
+ case Bytecodes::_d2f: tos_out = ftos; break;
+ case Bytecodes::_i2d: // fall through
+ case Bytecodes::_l2d: // fall through
+ case Bytecodes::_f2d: tos_out = dtos; break;
+ default : ShouldNotReachHere();
+ }
+ transition(tos_in, tos_out);
+ }
+#endif // ASSERT
+
+ // Conversion
+ switch (bytecode()) {
+ case Bytecodes::_i2l:
+ __ sign_extend(x10, x10, 32);
+ break;
+ case Bytecodes::_i2f:
+ __ fcvt_s_w(f10, x10);
+ break;
+ case Bytecodes::_i2d:
+ __ fcvt_d_w(f10, x10);
+ break;
+ case Bytecodes::_i2b:
+ __ sign_extend(x10, x10, 8);
+ break;
+ case Bytecodes::_i2c:
+ __ zero_extend(x10, x10, 16);
+ break;
+ case Bytecodes::_i2s:
+ __ sign_extend(x10, x10, 16);
+ break;
+ case Bytecodes::_l2i:
+ __ addw(x10, x10, zr);
+ break;
+ case Bytecodes::_l2f:
+ __ fcvt_s_l(f10, x10);
+ break;
+ case Bytecodes::_l2d:
+ __ fcvt_d_l(f10, x10);
+ break;
+ case Bytecodes::_f2i:
+ __ fcvt_w_s_safe(x10, f10);
+ break;
+ case Bytecodes::_f2l:
+ __ fcvt_l_s_safe(x10, f10);
+ break;
+ case Bytecodes::_f2d:
+ __ fcvt_d_s(f10, f10);
+ break;
+ case Bytecodes::_d2i:
+ __ fcvt_w_d_safe(x10, f10);
+ break;
+ case Bytecodes::_d2l:
+ __ fcvt_l_d_safe(x10, f10);
+ break;
+ case Bytecodes::_d2f:
+ __ fcvt_s_d(f10, f10);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+}
+
+void TemplateTable::lcmp()
+{
+ transition(ltos, itos);
+ __ pop_l(x11);
+ __ cmp_l2i(t0, x11, x10);
+ __ mv(x10, t0);
+}
+
+void TemplateTable::float_cmp(bool is_float, int unordered_result)
+{
+ // For instruction feq, flt and fle, the result is 0 if either operand is NaN
+ if (is_float) {
+ __ pop_f(f11);
+ // if unordered_result < 0:
+ // we want -1 for unordered or less than, 0 for equal and 1 for
+ // greater than.
+ // else:
+ // we want -1 for less than, 0 for equal and 1 for unordered or
+ // greater than.
+ // f11 primary, f10 secondary
+ __ float_compare(x10, f11, f10, unordered_result);
+ } else {
+ __ pop_d(f11);
+ // if unordered_result < 0:
+ // we want -1 for unordered or less than, 0 for equal and 1 for
+ // greater than.
+ // else:
+ // we want -1 for less than, 0 for equal and 1 for unordered or
+ // greater than.
+ // f11 primary, f10 secondary
+ __ double_compare(x10, f11, f10, unordered_result);
+ }
+}
+
+void TemplateTable::branch(bool is_jsr, bool is_wide)
+{
+ // We might be moving to a safepoint. The thread which calls
+ // Interpreter::notice_safepoints() will effectively flush its cache
+ // when it makes a system call, but we need to do something to
+ // ensure that we see the changed dispatch table.
+ __ membar(MacroAssembler::LoadLoad);
+
+ __ profile_taken_branch(x10, x11);
+ const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
+ InvocationCounter::counter_offset();
+ const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
+ InvocationCounter::counter_offset();
+
+ // load branch displacement
+ if (!is_wide) {
+ __ lhu(x12, at_bcp(1));
+ __ revb_h_h(x12, x12); // reverse bytes in half-word and sign-extend
+ } else {
+ __ lwu(x12, at_bcp(1));
+ __ revb_w_w(x12, x12); // reverse bytes in word and sign-extend
+ }
+
+ // Handle all the JSR stuff here, then exit.
+ // It's much shorter and cleaner than intermingling with the non-JSR
+ // normal-branch stuff occurring below.
+
+ if (is_jsr) {
+ // compute return address as bci
+ __ ld(t1, Address(xmethod, Method::const_offset()));
+ __ add(t1, t1,
+ in_bytes(ConstMethod::codes_offset()) - (is_wide ? 5 : 3));
+ __ sub(x11, xbcp, t1);
+ __ push_i(x11);
+ // Adjust the bcp by the 16-bit displacement in x12
+ __ add(xbcp, xbcp, x12);
+ __ load_unsigned_byte(t0, Address(xbcp, 0));
+ // load the next target bytecode into t0, it is the argument of dispatch_only
+ __ dispatch_only(vtos, /*generate_poll*/true);
+ return;
+ }
+
+ // Normal (non-jsr) branch handling
+
+ // Adjust the bcp by the displacement in x12
+ __ add(xbcp, xbcp, x12);
+
+ assert(UseLoopCounter || !UseOnStackReplacement,
+ "on-stack-replacement requires loop counters");
+ Label backedge_counter_overflow;
+ Label dispatch;
+ if (UseLoopCounter) {
+ // increment backedge counter for backward branches
+ // x10: MDO
+ // x11: MDO bumped taken-count
+ // x12: target offset
+ __ bgtz(x12, dispatch); // count only if backward branch
+
+ // check if MethodCounters exists
+ Label has_counters;
+ __ ld(t0, Address(xmethod, Method::method_counters_offset()));
+ __ bnez(t0, has_counters);
+ __ push_reg(x10);
+ __ push_reg(x11);
+ __ push_reg(x12);
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::build_method_counters), xmethod);
+ __ pop_reg(x12);
+ __ pop_reg(x11);
+ __ pop_reg(x10);
+ __ ld(t0, Address(xmethod, Method::method_counters_offset()));
+ __ beqz(t0, dispatch); // No MethodCounters allocated, OutOfMemory
+ __ bind(has_counters);
+
+ Label no_mdo;
+ int increment = InvocationCounter::count_increment;
+ if (ProfileInterpreter) {
+ // Are we profiling?
+ __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset())));
+ __ beqz(x11, no_mdo);
+ // Increment the MDO backedge counter
+ const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) +
+ in_bytes(InvocationCounter::counter_offset()));
+ const Address mask(x11, in_bytes(MethodData::backedge_mask_offset()));
+ __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
+ x10, t0, false,
+ UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
+ __ j(dispatch);
+ }
+ __ bind(no_mdo);
+ // Increment backedge counter in MethodCounters*
+ __ ld(t0, Address(xmethod, Method::method_counters_offset()));
+ const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset()));
+ __ increment_mask_and_jump(Address(t0, be_offset), increment, mask,
+ x10, t1, false,
+ UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
+ __ bind(dispatch);
+ }
+
+ // Pre-load the next target bytecode into t0
+ __ load_unsigned_byte(t0, Address(xbcp, 0));
+
+ // continue with the bytecode @ target
+ // t0: target bytecode
+ // xbcp: target bcp
+ __ dispatch_only(vtos, /*generate_poll*/true);
+
+ if (UseLoopCounter && UseOnStackReplacement) {
+ // invocation counter overflow
+ __ bind(backedge_counter_overflow);
+ __ neg(x12, x12);
+ __ add(x12, x12, xbcp); // branch xbcp
+ // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp)
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::frequency_counter_overflow),
+ x12);
+ __ load_unsigned_byte(x11, Address(xbcp, 0)); // restore target bytecode
+
+ // x10: osr nmethod (osr ok) or NULL (osr not possible)
+ // w11: target bytecode
+ // x12: temporary
+ __ beqz(x10, dispatch); // test result -- no osr if null
+ // nmethod may have been invalidated (VM may block upon call_VM return)
+ __ lbu(x12, Address(x10, nmethod::state_offset()));
+ if (nmethod::in_use != 0) {
+ __ sub(x12, x12, nmethod::in_use);
+ }
+ __ bnez(x12, dispatch);
+
+ // We have the address of an on stack replacement routine in x10
+ // We need to prepare to execute the OSR method. First we must
+ // migrate the locals and monitors off of the stack.
+
+ __ mv(x9, x10); // save the nmethod
+
+ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
+
+ // x10 is OSR buffer, move it to expected parameter location
+ __ mv(j_rarg0, x10);
+
+ // remove activation
+ // get sender esp
+ __ ld(esp,
+ Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize));
+ // remove frame anchor
+ __ leave();
+ // Ensure compiled code always sees stack at proper alignment
+ __ andi(sp, esp, -16);
+
+ // and begin the OSR nmethod
+ __ ld(t0, Address(x9, nmethod::osr_entry_point_offset()));
+ __ jr(t0);
+ }
+}
+
+void TemplateTable::if_0cmp(Condition cc)
+{
+ transition(itos, vtos);
+ // assume branch is more often taken than not (loops use backward branches)
+ Label not_taken;
+
+ __ addw(x10, x10, zr);
+ switch (cc) {
+ case equal:
+ __ bnez(x10, not_taken);
+ break;
+ case not_equal:
+ __ beqz(x10, not_taken);
+ break;
+ case less:
+ __ bgez(x10, not_taken);
+ break;
+ case less_equal:
+ __ bgtz(x10, not_taken);
+ break;
+ case greater:
+ __ blez(x10, not_taken);
+ break;
+ case greater_equal:
+ __ bltz(x10, not_taken);
+ break;
+ default:
+ break;
+ }
+
+ branch(false, false);
+ __ bind(not_taken);
+ __ profile_not_taken_branch(x10);
+}
+
+void TemplateTable::if_icmp(Condition cc)
+{
+ transition(itos, vtos);
+ // assume branch is more often taken than not (loops use backward branches)
+ Label not_taken;
+ __ pop_i(x11);
+ __ addw(x10, x10, zr);
+ switch (cc) {
+ case equal:
+ __ bne(x11, x10, not_taken);
+ break;
+ case not_equal:
+ __ beq(x11, x10, not_taken);
+ break;
+ case less:
+ __ bge(x11, x10, not_taken);
+ break;
+ case less_equal:
+ __ bgt(x11, x10, not_taken);
+ break;
+ case greater:
+ __ ble(x11, x10, not_taken);
+ break;
+ case greater_equal:
+ __ blt(x11, x10, not_taken);
+ break;
+ default:
+ break;
+ }
+
+ branch(false, false);
+ __ bind(not_taken);
+ __ profile_not_taken_branch(x10);
+}
+
+void TemplateTable::if_nullcmp(Condition cc)
+{
+ transition(atos, vtos);
+ // assume branch is more often taken than not (loops use backward branches)
+ Label not_taken;
+ if (cc == equal) {
+ __ bnez(x10, not_taken);
+ } else {
+ __ beqz(x10, not_taken);
+ }
+ branch(false, false);
+ __ bind(not_taken);
+ __ profile_not_taken_branch(x10);
+}
+
+void TemplateTable::if_acmp(Condition cc)
+{
+ transition(atos, vtos);
+ // assume branch is more often taken than not (loops use backward branches)
+ Label not_taken;
+ __ pop_ptr(x11);
+
+ if (cc == equal) {
+ __ bne(x11, x10, not_taken);
+ } else if (cc == not_equal) {
+ __ beq(x11, x10, not_taken);
+ }
+ branch(false, false);
+ __ bind(not_taken);
+ __ profile_not_taken_branch(x10);
+}
+
+void TemplateTable::ret() {
+ transition(vtos, vtos);
+ // We might be moving to a safepoint. The thread which calls
+ // Interpreter::notice_safepoints() will effectively flush its cache
+ // when it makes a system call, but we need to do something to
+ // ensure that we see the changed dispatch table.
+ __ membar(MacroAssembler::LoadLoad);
+
+ locals_index(x11);
+ __ ld(x11, aaddress(x11, t1, _masm)); // get return bci, compute return bcp
+ __ profile_ret(x11, x12);
+ __ ld(xbcp, Address(xmethod, Method::const_offset()));
+ __ add(xbcp, xbcp, x11);
+ __ addi(xbcp, xbcp, in_bytes(ConstMethod::codes_offset()));
+ __ dispatch_next(vtos, 0, /*generate_poll*/true);
+}
+
+void TemplateTable::wide_ret() {
+ transition(vtos, vtos);
+ locals_index_wide(x11);
+ __ ld(x11, aaddress(x11, t0, _masm)); // get return bci, compute return bcp
+ __ profile_ret(x11, x12);
+ __ ld(xbcp, Address(xmethod, Method::const_offset()));
+ __ add(xbcp, xbcp, x11);
+ __ add(xbcp, xbcp, in_bytes(ConstMethod::codes_offset()));
+ __ dispatch_next(vtos, 0, /*generate_poll*/true);
+}
+
+void TemplateTable::tableswitch() {
+ Label default_case, continue_execution;
+ transition(itos, vtos);
+ // align xbcp
+ __ la(x11, at_bcp(BytesPerInt));
+ __ andi(x11, x11, -BytesPerInt);
+ // load lo & hi
+ __ lwu(x12, Address(x11, BytesPerInt));
+ __ lwu(x13, Address(x11, 2 * BytesPerInt));
+ __ revb_w_w(x12, x12); // reverse bytes in word (32bit) and sign-extend
+ __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
+ // check against lo & hi
+ __ blt(x10, x12, default_case);
+ __ bgt(x10, x13, default_case);
+ // lookup dispatch offset
+ __ subw(x10, x10, x12);
+ __ shadd(x13, x10, x11, t0, 2);
+ __ lwu(x13, Address(x13, 3 * BytesPerInt));
+ __ profile_switch_case(x10, x11, x12);
+ // continue execution
+ __ bind(continue_execution);
+ __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
+ __ add(xbcp, xbcp, x13);
+ __ load_unsigned_byte(t0, Address(xbcp));
+ __ dispatch_only(vtos, /*generate_poll*/true);
+ // handle default
+ __ bind(default_case);
+ __ profile_switch_default(x10);
+ __ lwu(x13, Address(x11, 0));
+ __ j(continue_execution);
+}
+
+void TemplateTable::lookupswitch() {
+ transition(itos, itos);
+ __ stop("lookupswitch bytecode should have been rewritten");
+}
+
+void TemplateTable::fast_linearswitch() {
+ transition(itos, vtos);
+ Label loop_entry, loop, found, continue_execution;
+ // bswap x10 so we can avoid bswapping the table entries
+ __ revb_w_w(x10, x10); // reverse bytes in word (32bit) and sign-extend
+ // align xbcp
+ __ la(x9, at_bcp(BytesPerInt)); // btw: should be able to get rid of
+ // this instruction (change offsets
+ // below)
+ __ andi(x9, x9, -BytesPerInt);
+ // set counter
+ __ lwu(x11, Address(x9, BytesPerInt));
+ __ revb_w(x11, x11);
+ __ j(loop_entry);
+ // table search
+ __ bind(loop);
+ __ shadd(t0, x11, x9, t0, 3);
+ __ lw(t0, Address(t0, 2 * BytesPerInt));
+ __ beq(x10, t0, found);
+ __ bind(loop_entry);
+ __ addi(x11, x11, -1);
+ __ bgez(x11, loop);
+ // default case
+ __ profile_switch_default(x10);
+ __ lwu(x13, Address(x9, 0));
+ __ j(continue_execution);
+ // entry found -> get offset
+ __ bind(found);
+ __ shadd(t0, x11, x9, t0, 3);
+ __ lwu(x13, Address(t0, 3 * BytesPerInt));
+ __ profile_switch_case(x11, x10, x9);
+ // continue execution
+ __ bind(continue_execution);
+ __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
+ __ add(xbcp, xbcp, x13);
+ __ lbu(t0, Address(xbcp, 0));
+ __ dispatch_only(vtos, /*generate_poll*/true);
+}
+
+void TemplateTable::fast_binaryswitch() {
+ transition(itos, vtos);
+ // Implementation using the following core algorithm:
+ //
+ // int binary_search(int key, LookupswitchPair* array, int n)
+ // binary_search start:
+ // #Binary search according to "Methodik des Programmierens" by
+ // # Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
+ // int i = 0;
+ // int j = n;
+ // while (i + 1 < j) do
+ // # invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
+ // # with Q: for all i: 0 <= i < n: key < a[i]
+ // # where a stands for the array and assuming that the (inexisting)
+ // # element a[n] is infinitely big.
+ // int h = (i + j) >> 1
+ // # i < h < j
+ // if (key < array[h].fast_match())
+ // then [j = h]
+ // else [i = h]
+ // end
+ // # R: a[i] <= key < a[i+1] or Q
+ // # (i.e., if key is within array, i is the correct index)
+ // return i
+ // binary_search end
+
+
+ // Register allocation
+ const Register key = x10; // already set (tosca)
+ const Register array = x11;
+ const Register i = x12;
+ const Register j = x13;
+ const Register h = x14;
+ const Register temp = x15;
+
+ // Find array start
+ __ la(array, at_bcp(3 * BytesPerInt)); // btw: should be able to
+ // get rid of this
+ // instruction (change
+ // offsets below)
+ __ andi(array, array, -BytesPerInt);
+
+ // Initialize i & j
+ __ mv(i, zr); // i = 0
+ __ lwu(j, Address(array, -BytesPerInt)); // j = length(array)
+
+ // Convert j into native byteordering
+ __ revb_w(j, j);
+
+ // And start
+ Label entry;
+ __ j(entry);
+
+ // binary search loop
+ {
+ Label loop;
+ __ bind(loop);
+ __ addw(h, i, j); // h = i + j
+ __ srliw(h, h, 1); // h = (i + j) >> 1
+ // if [key < array[h].fast_match()]
+ // then [j = h]
+ // else [i = h]
+ // Convert array[h].match to native byte-ordering before compare
+ __ shadd(temp, h, array, temp, 3);
+ __ ld(temp, Address(temp, 0));
+ __ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend
+
+ Label L_done, L_greater;
+ __ bge(key, temp, L_greater);
+ // if [key < array[h].fast_match()] then j = h
+ __ mv(j, h);
+ __ j(L_done);
+ __ bind(L_greater);
+ // if [key >= array[h].fast_match()] then i = h
+ __ mv(i, h);
+ __ bind(L_done);
+
+ // while [i + 1 < j]
+ __ bind(entry);
+ __ addiw(h, i, 1); // i + 1
+ __ blt(h, j, loop); // i + 1 < j
+ }
+
+ // end of binary search, result index is i (must check again!)
+ Label default_case;
+ // Convert array[i].match to native byte-ordering before compare
+ __ shadd(temp, i, array, temp, 3);
+ __ ld(temp, Address(temp, 0));
+ __ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend
+ __ bne(key, temp, default_case);
+
+ // entry found -> j = offset
+ __ shadd(temp, i, array, temp, 3);
+ __ lwu(j, Address(temp, BytesPerInt));
+ __ profile_switch_case(i, key, array);
+ __ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend
+
+ __ add(temp, xbcp, j);
+ __ load_unsigned_byte(t0, Address(temp, 0));
+
+ __ add(xbcp, xbcp, j);
+ __ la(xbcp, Address(xbcp, 0));
+ __ dispatch_only(vtos, /*generate_poll*/true);
+
+ // default case -> j = default offset
+ __ bind(default_case);
+ __ profile_switch_default(i);
+ __ lwu(j, Address(array, -2 * BytesPerInt));
+ __ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend
+
+ __ add(temp, xbcp, j);
+ __ load_unsigned_byte(t0, Address(temp, 0));
+
+ __ add(xbcp, xbcp, j);
+ __ la(xbcp, Address(xbcp, 0));
+ __ dispatch_only(vtos, /*generate_poll*/true);
+}
+
+void TemplateTable::_return(TosState state)
+{
+ transition(state, state);
+ assert(_desc->calls_vm(),
+ "inconsistent calls_vm information"); // call in remove_activation
+
+ if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
+ assert(state == vtos, "only valid state");
+
+ __ ld(c_rarg1, aaddress(0));
+ __ load_klass(x13, c_rarg1);
+ __ lwu(x13, Address(x13, Klass::access_flags_offset()));
+ Label skip_register_finalizer;
+ __ andi(t0, x13, JVM_ACC_HAS_FINALIZER);
+ __ beqz(t0, skip_register_finalizer);
+
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), c_rarg1);
+
+ __ bind(skip_register_finalizer);
+ }
+
+ // Issue a StoreStore barrier after all stores but before return
+ // from any constructor for any class with a final field. We don't
+ // know if this is a finalizer, so we always do so.
+ if (_desc->bytecode() == Bytecodes::_return) {
+ __ membar(MacroAssembler::StoreStore);
+ }
+
+ // Narrow result if state is itos but result type is smaller.
+ // Need to narrow in the return bytecode rather than in generate_return_entry
+ // since compiled code callers expect the result to already be narrowed.
+ if (state == itos) {
+ __ narrow(x10);
+ }
+
+ __ remove_activation(state);
+ __ ret();
+}
+
+
+// ----------------------------------------------------------------------------
+// Volatile variables demand their effects be made known to all CPU's
+// in order. Store buffers on most chips allow reads & writes to
+// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
+// without some kind of memory barrier (i.e., it's not sufficient that
+// the interpreter does not reorder volatile references, the hardware
+// also must not reorder them).
+//
+// According to the new Java Memory Model (JMM):
+// (1) All volatiles are serialized wrt to each other. ALSO reads &
+// writes act as aquire & release, so:
+// (2) A read cannot let unrelated NON-volatile memory refs that
+// happen after the read float up to before the read. It's OK for
+// non-volatile memory refs that happen before the volatile read to
+// float down below it.
+// (3) Similar a volatile write cannot let unrelated NON-volatile
+// memory refs that happen BEFORE the write float down to after the
+// write. It's OK for non-volatile memory refs that happen after the
+// volatile write to float up before it.
+//
+// We only put in barriers around volatile refs (they are expensive),
+// not _between_ memory refs (that would require us to track the
+// flavor of the previous memory refs). Requirements (2) and (3)
+// require some barriers before volatile stores and after volatile
+// loads. These nearly cover requirement (1) but miss the
+// volatile-store-volatile-load case. This final case is placed after
+// volatile-stores although it could just as well go before
+// volatile-loads.
+
+void TemplateTable::resolve_cache_and_index(int byte_no,
+ Register Rcache,
+ Register index,
+ size_t index_size) {
+ const Register temp = x9;
+ assert_different_registers(Rcache, index, temp);
+
+ Label resolved, clinit_barrier_slow;
+
+ Bytecodes::Code code = bytecode();
+ switch (code) {
+ case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
+ case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
+ default: break;
+ }
+
+ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
+ __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
+ __ mv(t0, (int) code);
+ __ beq(temp, t0, resolved);
+
+ // resolve first time through
+ // Class initialization barrier slow path lands here as well.
+ __ bind(clinit_barrier_slow);
+
+ address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
+ __ mv(temp, (int) code);
+ __ call_VM(noreg, entry, temp);
+
+ // Update registers with resolved info
+ __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
+ // n.b. unlike x86 Rcache is now rcpool plus the indexed offset
+ // so all clients ofthis method must be modified accordingly
+ __ bind(resolved);
+
+ // Class initialization barrier for static methods
+ if (VM_Version::supports_fast_class_init_checks() && bytecode() == Bytecodes::_invokestatic) {
+ __ load_resolved_method_at_index(byte_no, temp, Rcache);
+ __ load_method_holder(temp, temp);
+ __ clinit_barrier(temp, t0, NULL, &clinit_barrier_slow);
+ }
+}
+
+// The Rcache and index registers must be set before call
+// n.b unlike x86 cache already includes the index offset
+void TemplateTable::load_field_cp_cache_entry(Register obj,
+ Register cache,
+ Register index,
+ Register off,
+ Register flags,
+ bool is_static = false) {
+ assert_different_registers(cache, index, flags, off);
+
+ ByteSize cp_base_offset = ConstantPoolCache::base_offset();
+ // Field offset
+ __ ld(off, Address(cache, in_bytes(cp_base_offset +
+ ConstantPoolCacheEntry::f2_offset())));
+ // Flags
+ __ lwu(flags, Address(cache, in_bytes(cp_base_offset +
+ ConstantPoolCacheEntry::flags_offset())));
+
+ // klass overwrite register
+ if (is_static) {
+ __ ld(obj, Address(cache, in_bytes(cp_base_offset +
+ ConstantPoolCacheEntry::f1_offset())));
+ const int mirror_offset = in_bytes(Klass::java_mirror_offset());
+ __ ld(obj, Address(obj, mirror_offset));
+ __ resolve_oop_handle(obj);
+ }
+}
+
+void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
+ Register method,
+ Register itable_index,
+ Register flags,
+ bool is_invokevirtual,
+ bool is_invokevfinal, /*unused*/
+ bool is_invokedynamic) {
+ // setup registers
+ const Register cache = t1;
+ const Register index = x14;
+ assert_different_registers(method, flags);
+ assert_different_registers(method, cache, index);
+ assert_different_registers(itable_index, flags);
+ assert_different_registers(itable_index, cache, index);
+ // determine constant pool cache field offsets
+ assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
+ const int method_offset = in_bytes(ConstantPoolCache::base_offset() +
+ (is_invokevirtual ?
+ ConstantPoolCacheEntry::f2_offset() :
+ ConstantPoolCacheEntry::f1_offset()));
+ const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
+ ConstantPoolCacheEntry::flags_offset());
+ // access constant pool cache fields
+ const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
+ ConstantPoolCacheEntry::f2_offset());
+
+ const size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
+ resolve_cache_and_index(byte_no, cache, index, index_size);
+ __ ld(method, Address(cache, method_offset));
+
+ if (itable_index != noreg) {
+ __ ld(itable_index, Address(cache, index_offset));
+ }
+ __ lwu(flags, Address(cache, flags_offset));
+}
+
+// The registers cache and index expected to be set before call.
+// Correct values of the cache and index registers are preserved.
+void TemplateTable::jvmti_post_field_access(Register cache, Register index,
+ bool is_static, bool has_tos) {
+ // do the JVMTI work here to avoid disturbing the register state below
+ // We use c_rarg registers here beacause we want to use the register used in
+ // the call to the VM
+ if (JvmtiExport::can_post_field_access()) {
+ // Check to see if a field access watch has been set before we
+ // take the time to call into the VM.
+ Label L1;
+ assert_different_registers(cache, index, x10);
+ int32_t offset = 0;
+ __ la_patchable(t0, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), offset);
+ __ lwu(x10, Address(t0, offset));
+
+ __ beqz(x10, L1);
+
+ __ get_cache_and_index_at_bcp(c_rarg2, c_rarg3, 1);
+ __ la(c_rarg2, Address(c_rarg2, in_bytes(ConstantPoolCache::base_offset())));
+
+ if (is_static) {
+ __ mv(c_rarg1, zr); // NULL object reference
+ } else {
+ __ ld(c_rarg1, at_tos()); // get object pointer without popping it
+ __ verify_oop(c_rarg1);
+ }
+ // c_rarg1: object pointer or NULL
+ // c_rarg2: cache entry pointer
+ // c_rarg3: jvalue object on the stack
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::post_field_access),
+ c_rarg1, c_rarg2, c_rarg3);
+ __ get_cache_and_index_at_bcp(cache, index, 1);
+ __ bind(L1);
+ }
+}
+
+void TemplateTable::pop_and_check_object(Register r)
+{
+ __ pop_ptr(r);
+ __ null_check(r); // for field access must check obj.
+ __ verify_oop(r);
+}
+
+void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc)
+{
+ const Register cache = x12;
+ const Register index = x13;
+ const Register obj = x14;
+ const Register off = x9;
+ const Register flags = x10;
+ const Register raw_flags = x16;
+ const Register bc = x14; // uses same reg as obj, so don't mix them
+
+ resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
+ jvmti_post_field_access(cache, index, is_static, false);
+ load_field_cp_cache_entry(obj, cache, index, off, raw_flags, is_static);
+
+ if (!is_static) {
+ // obj is on the stack
+ pop_and_check_object(obj);
+ }
+
+ __ add(off, obj, off);
+ const Address field(off);
+
+ Label Done, notByte, notBool, notInt, notShort, notChar,
+ notLong, notFloat, notObj, notDouble;
+
+ __ slli(flags, raw_flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift +
+ ConstantPoolCacheEntry::tos_state_bits));
+ __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits);
+
+ assert(btos == 0, "change code, btos != 0");
+ __ bnez(flags, notByte);
+
+ // Dont't rewrite getstatic, only getfield
+ if (is_static) {
+ rc = may_not_rewrite;
+ }
+
+ // btos
+ __ access_load_at(T_BYTE, IN_HEAP, x10, field, noreg, noreg);
+ __ push(btos);
+ // Rewrite bytecode to be faster
+ if (rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_bgetfield, bc, x11);
+ }
+ __ j(Done);
+
+ __ bind(notByte);
+ __ sub(t0, flags, (u1)ztos);
+ __ bnez(t0, notBool);
+
+ // ztos (same code as btos)
+ __ access_load_at(T_BOOLEAN, IN_HEAP, x10, field, noreg, noreg);
+ __ push(ztos);
+ // Rewirte bytecode to be faster
+ if (rc == may_rewrite) {
+ // uses btos rewriting, no truncating to t/f bit is needed for getfield
+ patch_bytecode(Bytecodes::_fast_bgetfield, bc, x11);
+ }
+ __ j(Done);
+
+ __ bind(notBool);
+ __ sub(t0, flags, (u1)atos);
+ __ bnez(t0, notObj);
+ // atos
+ do_oop_load(_masm, field, x10, IN_HEAP);
+ __ push(atos);
+ if (rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_agetfield, bc, x11);
+ }
+ __ j(Done);
+
+ __ bind(notObj);
+ __ sub(t0, flags, (u1)itos);
+ __ bnez(t0, notInt);
+ // itos
+ __ access_load_at(T_INT, IN_HEAP, x10, field, noreg, noreg);
+ __ addw(x10, x10, zr); // signed extended
+ __ push(itos);
+ // Rewrite bytecode to be faster
+ if (rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_igetfield, bc, x11);
+ }
+ __ j(Done);
+
+ __ bind(notInt);
+ __ sub(t0, flags, (u1)ctos);
+ __ bnez(t0, notChar);
+ // ctos
+ __ access_load_at(T_CHAR, IN_HEAP, x10, field, noreg, noreg);
+ __ push(ctos);
+ // Rewrite bytecode to be faster
+ if (rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_cgetfield, bc, x11);
+ }
+ __ j(Done);
+
+ __ bind(notChar);
+ __ sub(t0, flags, (u1)stos);
+ __ bnez(t0, notShort);
+ // stos
+ __ access_load_at(T_SHORT, IN_HEAP, x10, field, noreg, noreg);
+ __ push(stos);
+ // Rewrite bytecode to be faster
+ if (rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_sgetfield, bc, x11);
+ }
+ __ j(Done);
+
+ __ bind(notShort);
+ __ sub(t0, flags, (u1)ltos);
+ __ bnez(t0, notLong);
+ // ltos
+ __ access_load_at(T_LONG, IN_HEAP, x10, field, noreg, noreg);
+ __ push(ltos);
+ // Rewrite bytecode to be faster
+ if (rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_lgetfield, bc, x11);
+ }
+ __ j(Done);
+
+ __ bind(notLong);
+ __ sub(t0, flags, (u1)ftos);
+ __ bnez(t0, notFloat);
+ // ftos
+ __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
+ __ push(ftos);
+ // Rewrite bytecode to be faster
+ if (rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_fgetfield, bc, x11);
+ }
+ __ j(Done);
+
+ __ bind(notFloat);
+#ifdef ASSERT
+ __ sub(t0, flags, (u1)dtos);
+ __ bnez(t0, notDouble);
+#endif
+ // dtos
+ __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
+ __ push(dtos);
+ // Rewrite bytecode to be faster
+ if (rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_dgetfield, bc, x11);
+ }
+#ifdef ASSERT
+ __ j(Done);
+
+ __ bind(notDouble);
+ __ stop("Bad state");
+#endif
+
+ __ bind(Done);
+
+ Label notVolatile;
+ __ andi(t0, raw_flags, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
+ __ beqz(t0, notVolatile);
+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+ __ bind(notVolatile);
+}
+
+void TemplateTable::getfield(int byte_no)
+{
+ getfield_or_static(byte_no, false);
+}
+
+void TemplateTable::nofast_getfield(int byte_no) {
+ getfield_or_static(byte_no, false, may_not_rewrite);
+}
+
+void TemplateTable::getstatic(int byte_no)
+{
+ getfield_or_static(byte_no, true);
+}
+
+// The registers cache and index expected to be set before call.
+// The function may destroy various registers, just not the cache and index registers.
+void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
+ transition(vtos, vtos);
+
+ ByteSize cp_base_offset = ConstantPoolCache::base_offset();
+
+ if (JvmtiExport::can_post_field_modification()) {
+ // Check to see if a field modification watch has been set before
+ // we take the time to call into the VM.
+ Label L1;
+ assert_different_registers(cache, index, x10);
+ int32_t offset = 0;
+ __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()), offset);
+ __ lwu(x10, Address(t0, offset));
+ __ beqz(x10, L1);
+
+ __ get_cache_and_index_at_bcp(c_rarg2, t0, 1);
+
+ if (is_static) {
+ // Life is simple. Null out the object pointer.
+ __ mv(c_rarg1, zr);
+ } else {
+ // Life is harder. The stack holds the value on top, followed by
+ // the object. We don't know the size of the value, though; it
+ // could be one or two words depending on its type. As a result,
+ // we must find the type to determine where the object is.
+ __ lwu(c_rarg3, Address(c_rarg2,
+ in_bytes(cp_base_offset +
+ ConstantPoolCacheEntry::flags_offset())));
+ __ srli(c_rarg3, c_rarg3, ConstantPoolCacheEntry::tos_state_shift);
+ ConstantPoolCacheEntry::verify_tos_state_shift();
+ Label nope2, done, ok;
+ __ ld(c_rarg1, at_tos_p1()); // initially assume a one word jvalue
+ __ sub(t0, c_rarg3, ltos);
+ __ beqz(t0, ok);
+ __ sub(t0, c_rarg3, dtos);
+ __ bnez(t0, nope2);
+ __ bind(ok);
+ __ ld(c_rarg1, at_tos_p2()); // ltos (two word jvalue);
+ __ bind(nope2);
+ }
+ // cache entry pointer
+ __ add(c_rarg2, c_rarg2, in_bytes(cp_base_offset));
+ // object (tos)
+ __ mv(c_rarg3, esp);
+ // c_rarg1: object pointer set up above (NULL if static)
+ // c_rarg2: cache entry pointer
+ // c_rarg3: jvalue object on the stack
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::post_field_modification),
+ c_rarg1, c_rarg2, c_rarg3);
+ __ get_cache_and_index_at_bcp(cache, index, 1);
+ __ bind(L1);
+ }
+}
+
+void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
+ transition(vtos, vtos);
+
+ const Register cache = x12;
+ const Register index = x13;
+ const Register obj = x12;
+ const Register off = x9;
+ const Register flags = x10;
+ const Register bc = x14;
+
+ resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
+ jvmti_post_field_mod(cache, index, is_static);
+ load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
+
+ Label Done;
+ __ mv(x15, flags);
+
+ {
+ Label notVolatile;
+ __ andi(t0, x15, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
+ __ beqz(t0, notVolatile);
+ __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore);
+ __ bind(notVolatile);
+ }
+
+ Label notByte, notBool, notInt, notShort, notChar,
+ notLong, notFloat, notObj, notDouble;
+
+ __ slli(flags, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift +
+ ConstantPoolCacheEntry::tos_state_bits));
+ __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits);
+
+ assert(btos == 0, "change code, btos != 0");
+ __ bnez(flags, notByte);
+
+ // Don't rewrite putstatic, only putfield
+ if (is_static) {
+ rc = may_not_rewrite;
+ }
+
+ // btos
+ {
+ __ pop(btos);
+ // field address
+ if (!is_static) {
+ pop_and_check_object(obj);
+ }
+ __ add(off, obj, off); // if static, obj from cache, else obj from stack.
+ const Address field(off, 0); // off register as temparator register.
+ __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg);
+ if (rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_bputfield, bc, x11, true, byte_no);
+ }
+ __ j(Done);
+ }
+
+ __ bind(notByte);
+ __ sub(t0, flags, (u1)ztos);
+ __ bnez(t0, notBool);
+
+ // ztos
+ {
+ __ pop(ztos);
+ // field address
+ if (!is_static) {
+ pop_and_check_object(obj);
+ }
+ __ add(off, obj, off); // if static, obj from cache, else obj from stack.
+ const Address field(off, 0);
+ __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg);
+ if (rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_zputfield, bc, x11, true, byte_no);
+ }
+ __ j(Done);
+ }
+
+ __ bind(notBool);
+ __ sub(t0, flags, (u1)atos);
+ __ bnez(t0, notObj);
+
+ // atos
+ {
+ __ pop(atos);
+ // field address
+ if (!is_static) {
+ pop_and_check_object(obj);
+ }
+ __ add(off, obj, off); // if static, obj from cache, else obj from stack.
+ const Address field(off, 0);
+ // Store into the field
+ do_oop_store(_masm, field, x10, IN_HEAP);
+ if (rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_aputfield, bc, x11, true, byte_no);
+ }
+ __ j(Done);
+ }
+
+ __ bind(notObj);
+ __ sub(t0, flags, (u1)itos);
+ __ bnez(t0, notInt);
+
+ // itos
+ {
+ __ pop(itos);
+ // field address
+ if (!is_static) {
+ pop_and_check_object(obj);
+ }
+ __ add(off, obj, off); // if static, obj from cache, else obj from stack.
+ const Address field(off, 0);
+ __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg);
+ if (rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_iputfield, bc, x11, true, byte_no);
+ }
+ __ j(Done);
+ }
+
+ __ bind(notInt);
+ __ sub(t0, flags, (u1)ctos);
+ __ bnez(t0, notChar);
+
+ // ctos
+ {
+ __ pop(ctos);
+ // field address
+ if (!is_static) {
+ pop_and_check_object(obj);
+ }
+ __ add(off, obj, off); // if static, obj from cache, else obj from stack.
+ const Address field(off, 0);
+ __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg);
+ if (rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_cputfield, bc, x11, true, byte_no);
+ }
+ __ j(Done);
+ }
+
+ __ bind(notChar);
+ __ sub(t0, flags, (u1)stos);
+ __ bnez(t0, notShort);
+
+ // stos
+ {
+ __ pop(stos);
+ // field address
+ if (!is_static) {
+ pop_and_check_object(obj);
+ }
+ __ add(off, obj, off); // if static, obj from cache, else obj from stack.
+ const Address field(off, 0);
+ __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg);
+ if (rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_sputfield, bc, x11, true, byte_no);
+ }
+ __ j(Done);
+ }
+
+ __ bind(notShort);
+ __ sub(t0, flags, (u1)ltos);
+ __ bnez(t0, notLong);
+
+ // ltos
+ {
+ __ pop(ltos);
+ // field address
+ if (!is_static) {
+ pop_and_check_object(obj);
+ }
+ __ add(off, obj, off); // if static, obj from cache, else obj from stack.
+ const Address field(off, 0);
+ __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg);
+ if (rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_lputfield, bc, x11, true, byte_no);
+ }
+ __ j(Done);
+ }
+
+ __ bind(notLong);
+ __ sub(t0, flags, (u1)ftos);
+ __ bnez(t0, notFloat);
+
+ // ftos
+ {
+ __ pop(ftos);
+ // field address
+ if (!is_static) {
+ pop_and_check_object(obj);
+ }
+ __ add(off, obj, off); // if static, obj from cache, else obj from stack.
+ const Address field(off, 0);
+ __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg);
+ if (rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_fputfield, bc, x11, true, byte_no);
+ }
+ __ j(Done);
+ }
+
+ __ bind(notFloat);
+#ifdef ASSERT
+ __ sub(t0, flags, (u1)dtos);
+ __ bnez(t0, notDouble);
+#endif
+
+ // dtos
+ {
+ __ pop(dtos);
+ // field address
+ if (!is_static) {
+ pop_and_check_object(obj);
+ }
+ __ add(off, obj, off); // if static, obj from cache, else obj from stack.
+ const Address field(off, 0);
+ __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg);
+ if (rc == may_rewrite) {
+ patch_bytecode(Bytecodes::_fast_dputfield, bc, x11, true, byte_no);
+ }
+ }
+
+#ifdef ASSERT
+ __ j(Done);
+
+ __ bind(notDouble);
+ __ stop("Bad state");
+#endif
+
+ __ bind(Done);
+
+ {
+ Label notVolatile;
+ __ andi(t0, x15, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
+ __ beqz(t0, notVolatile);
+ __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore);
+ __ bind(notVolatile);
+ }
+}
+
+void TemplateTable::putfield(int byte_no)
+{
+ putfield_or_static(byte_no, false);
+}
+
+void TemplateTable::nofast_putfield(int byte_no) {
+ putfield_or_static(byte_no, false, may_not_rewrite);
+}
+
+void TemplateTable::putstatic(int byte_no) {
+ putfield_or_static(byte_no, true);
+}
+
+void TemplateTable::jvmti_post_fast_field_mod()
+{
+ if (JvmtiExport::can_post_field_modification()) {
+ // Check to see if a field modification watch has been set before
+ // we take the time to call into the VM.
+ Label L2;
+ int32_t offset = 0;
+ __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()), offset);
+ __ lwu(c_rarg3, Address(t0, offset));
+ __ beqz(c_rarg3, L2);
+ __ pop_ptr(x9); // copy the object pointer from tos
+ __ verify_oop(x9);
+ __ push_ptr(x9); // put the object pointer back on tos
+ // Save tos values before call_VM() clobbers them. Since we have
+ // to do it for every data type, we use the saved values as the
+ // jvalue object.
+ switch (bytecode()) { // load values into the jvalue object
+ case Bytecodes::_fast_aputfield: __ push_ptr(x10); break;
+ case Bytecodes::_fast_bputfield: // fall through
+ case Bytecodes::_fast_zputfield: // fall through
+ case Bytecodes::_fast_sputfield: // fall through
+ case Bytecodes::_fast_cputfield: // fall through
+ case Bytecodes::_fast_iputfield: __ push_i(x10); break;
+ case Bytecodes::_fast_dputfield: __ push_d(); break;
+ case Bytecodes::_fast_fputfield: __ push_f(); break;
+ case Bytecodes::_fast_lputfield: __ push_l(x10); break;
+
+ default:
+ ShouldNotReachHere();
+ }
+ __ mv(c_rarg3, esp); // points to jvalue on the stack
+ // access constant pool cache entry
+ __ get_cache_entry_pointer_at_bcp(c_rarg2, x10, 1);
+ __ verify_oop(x9);
+ // x9: object pointer copied above
+ // c_rarg2: cache entry pointer
+ // c_rarg3: jvalue object on the stack
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::post_field_modification),
+ x9, c_rarg2, c_rarg3);
+
+ switch (bytecode()) { // restore tos values
+ case Bytecodes::_fast_aputfield: __ pop_ptr(x10); break;
+ case Bytecodes::_fast_bputfield: // fall through
+ case Bytecodes::_fast_zputfield: // fall through
+ case Bytecodes::_fast_sputfield: // fall through
+ case Bytecodes::_fast_cputfield: // fall through
+ case Bytecodes::_fast_iputfield: __ pop_i(x10); break;
+ case Bytecodes::_fast_dputfield: __ pop_d(); break;
+ case Bytecodes::_fast_fputfield: __ pop_f(); break;
+ case Bytecodes::_fast_lputfield: __ pop_l(x10); break;
+ default: break;
+ }
+ __ bind(L2);
+ }
+}
+
+void TemplateTable::fast_storefield(TosState state)
+{
+ transition(state, vtos);
+
+ ByteSize base = ConstantPoolCache::base_offset();
+
+ jvmti_post_fast_field_mod();
+
+ // access constant pool cache
+ __ get_cache_and_index_at_bcp(x12, x11, 1);
+
+ // Must prevent reordering of the following cp cache loads with bytecode load
+ __ membar(MacroAssembler::LoadLoad);
+
+ // test for volatile with x13
+ __ lwu(x13, Address(x12, in_bytes(base +
+ ConstantPoolCacheEntry::flags_offset())));
+
+ // replace index with field offset from cache entry
+ __ ld(x11, Address(x12, in_bytes(base + ConstantPoolCacheEntry::f2_offset())));
+
+ {
+ Label notVolatile;
+ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
+ __ beqz(t0, notVolatile);
+ __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore);
+ __ bind(notVolatile);
+ }
+
+ // Get object from stack
+ pop_and_check_object(x12);
+
+ // field address
+ __ add(x11, x12, x11);
+ const Address field(x11, 0);
+
+ // access field
+ switch (bytecode()) {
+ case Bytecodes::_fast_aputfield:
+ do_oop_store(_masm, field, x10, IN_HEAP);
+ break;
+ case Bytecodes::_fast_lputfield:
+ __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg);
+ break;
+ case Bytecodes::_fast_iputfield:
+ __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg);
+ break;
+ case Bytecodes::_fast_zputfield:
+ __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg);
+ break;
+ case Bytecodes::_fast_bputfield:
+ __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg);
+ break;
+ case Bytecodes::_fast_sputfield:
+ __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg);
+ break;
+ case Bytecodes::_fast_cputfield:
+ __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg);
+ break;
+ case Bytecodes::_fast_fputfield:
+ __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg);
+ break;
+ case Bytecodes::_fast_dputfield:
+ __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+
+ {
+ Label notVolatile;
+ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
+ __ beqz(t0, notVolatile);
+ __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore);
+ __ bind(notVolatile);
+ }
+}
+
+void TemplateTable::fast_accessfield(TosState state)
+{
+ transition(atos, state);
+ // Do the JVMTI work here to avoid disturbing the register state below
+ if (JvmtiExport::can_post_field_access()) {
+ // Check to see if a field access watch has been set before we
+ // take the time to call into the VM.
+ Label L1;
+ int32_t offset = 0;
+ __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_access_count_addr()), offset);
+ __ lwu(x12, Address(t0, offset));
+ __ beqz(x12, L1);
+ // access constant pool cache entry
+ __ get_cache_entry_pointer_at_bcp(c_rarg2, t1, 1);
+ __ verify_oop(x10);
+ __ push_ptr(x10); // save object pointer before call_VM() clobbers it
+ __ mv(c_rarg1, x10);
+ // c_rarg1: object pointer copied above
+ // c_rarg2: cache entry pointer
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::post_field_access),
+ c_rarg1, c_rarg2);
+ __ pop_ptr(x10); // restore object pointer
+ __ bind(L1);
+ }
+
+ // access constant pool cache
+ __ get_cache_and_index_at_bcp(x12, x11, 1);
+
+ // Must prevent reordering of the following cp cache loads with bytecode load
+ __ membar(MacroAssembler::LoadLoad);
+
+ __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
+ ConstantPoolCacheEntry::f2_offset())));
+ __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
+ ConstantPoolCacheEntry::flags_offset())));
+
+ // x10: object
+ __ verify_oop(x10);
+ __ null_check(x10);
+ __ add(x11, x10, x11);
+ const Address field(x11, 0);
+
+ // access field
+ switch (bytecode()) {
+ case Bytecodes::_fast_agetfield:
+ do_oop_load(_masm, field, x10, IN_HEAP);
+ __ verify_oop(x10);
+ break;
+ case Bytecodes::_fast_lgetfield:
+ __ access_load_at(T_LONG, IN_HEAP, x10, field, noreg, noreg);
+ break;
+ case Bytecodes::_fast_igetfield:
+ __ access_load_at(T_INT, IN_HEAP, x10, field, noreg, noreg);
+ __ addw(x10, x10, zr); // signed extended
+ break;
+ case Bytecodes::_fast_bgetfield:
+ __ access_load_at(T_BYTE, IN_HEAP, x10, field, noreg, noreg);
+ break;
+ case Bytecodes::_fast_sgetfield:
+ __ access_load_at(T_SHORT, IN_HEAP, x10, field, noreg, noreg);
+ break;
+ case Bytecodes::_fast_cgetfield:
+ __ access_load_at(T_CHAR, IN_HEAP, x10, field, noreg, noreg);
+ break;
+ case Bytecodes::_fast_fgetfield:
+ __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
+ break;
+ case Bytecodes::_fast_dgetfield:
+ __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ {
+ Label notVolatile;
+ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
+ __ beqz(t0, notVolatile);
+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+ __ bind(notVolatile);
+ }
+}
+
+void TemplateTable::fast_xaccess(TosState state)
+{
+ transition(vtos, state);
+
+ // get receiver
+ __ ld(x10, aaddress(0));
+ // access constant pool cache
+ __ get_cache_and_index_at_bcp(x12, x13, 2);
+ __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
+ ConstantPoolCacheEntry::f2_offset())));
+
+ // make sure exception is reported in correct bcp range (getfield is
+ // next instruction)
+ __ addi(xbcp, xbcp, 1);
+ __ null_check(x10);
+ switch (state) {
+ case itos:
+ __ add(x10, x10, x11);
+ __ access_load_at(T_INT, IN_HEAP, x10, Address(x10, 0), noreg, noreg);
+ __ addw(x10, x10, zr); // signed extended
+ break;
+ case atos:
+ __ add(x10, x10, x11);
+ do_oop_load(_masm, Address(x10, 0), x10, IN_HEAP);
+ __ verify_oop(x10);
+ break;
+ case ftos:
+ __ add(x10, x10, x11);
+ __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, Address(x10), noreg, noreg);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+
+ {
+ Label notVolatile;
+ __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
+ ConstantPoolCacheEntry::flags_offset())));
+ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
+ __ beqz(t0, notVolatile);
+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+ __ bind(notVolatile);
+ }
+
+ __ sub(xbcp, xbcp, 1);
+}
+
+//-----------------------------------------------------------------------------
+// Calls
+
+void TemplateTable::prepare_invoke(int byte_no,
+ Register method, // linked method (or i-klass)
+ Register index, // itable index, MethodType, etc.
+ Register recv, // if caller wants to see it
+ Register flags // if caller wants to test it
+ ) {
+ // determine flags
+ const Bytecodes::Code code = bytecode();
+ const bool is_invokeinterface = code == Bytecodes::_invokeinterface;
+ const bool is_invokedynamic = code == Bytecodes::_invokedynamic;
+ const bool is_invokehandle = code == Bytecodes::_invokehandle;
+ const bool is_invokevirtual = code == Bytecodes::_invokevirtual;
+ const bool is_invokespecial = code == Bytecodes::_invokespecial;
+ const bool load_receiver = (recv != noreg);
+ const bool save_flags = (flags != noreg);
+ assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
+ assert(save_flags == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
+ assert(flags == noreg || flags == x13, "");
+ assert(recv == noreg || recv == x12, "");
+
+ // setup registers & access constant pool cache
+ if (recv == noreg) {
+ recv = x12;
+ }
+ if (flags == noreg) {
+ flags = x13;
+ }
+ assert_different_registers(method, index, recv, flags);
+
+ // save 'interpreter return address'
+ __ save_bcp();
+
+ load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
+
+ // maybe push appendix to arguments (just before return address)
+ if (is_invokedynamic || is_invokehandle) {
+ Label L_no_push;
+ __ andi(t0, flags, 1UL << ConstantPoolCacheEntry::has_appendix_shift);
+ __ beqz(t0, L_no_push);
+ // Push the appendix as a trailing parameter.
+ // This must be done before we get the receiver,
+ // since the parameter_size includes it.
+ __ push_reg(x9);
+ __ mv(x9, index);
+ __ load_resolved_reference_at_index(index, x9);
+ __ pop_reg(x9);
+ __ push_reg(index); // push appendix (MethodType, CallSite, etc.)
+ __ bind(L_no_push);
+ }
+
+ // load receiver if needed (note: no return address pushed yet)
+ if (load_receiver) {
+ __ andi(recv, flags, ConstantPoolCacheEntry::parameter_size_mask); // parameter_size_mask = 1 << 8
+ __ shadd(t0, recv, esp, t0, 3);
+ __ ld(recv, Address(t0, -Interpreter::expr_offset_in_bytes(1)));
+ __ verify_oop(recv);
+ }
+
+ // compute return type
+ __ slli(t1, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits));
+ __ srli(t1, t1, XLEN - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> t1:0~3
+
+ // load return address
+ {
+ const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code);
+ __ mv(t0, table_addr);
+ __ shadd(t0, t1, t0, t1, 3);
+ __ ld(ra, Address(t0, 0));
+ }
+}
+
+void TemplateTable::invokevirtual_helper(Register index,
+ Register recv,
+ Register flags)
+{
+ // Uses temporary registers x10, x13
+ assert_different_registers(index, recv, x10, x13);
+ // Test for an invoke of a final method
+ Label notFinal;
+ __ andi(t0, flags, 1UL << ConstantPoolCacheEntry::is_vfinal_shift);
+ __ beqz(t0, notFinal);
+
+ const Register method = index; // method must be xmethod
+ assert(method == xmethod, "Method must be xmethod for interpreter calling convention");
+
+ // do the call - the index is actually the method to call
+ // that is, f2 is a vtable index if !is_vfinal, else f2 is a Method*
+
+ // It's final, need a null check here!
+ __ null_check(recv);
+
+ // profile this call
+ __ profile_final_call(x10);
+ __ profile_arguments_type(x10, method, x14, true);
+
+ __ jump_from_interpreted(method);
+
+ __ bind(notFinal);
+
+ // get receiver klass
+ __ null_check(recv, oopDesc::klass_offset_in_bytes());
+ __ load_klass(x10, recv);
+
+ // profile this call
+ __ profile_virtual_call(x10, xlocals, x13);
+
+ // get target Method & entry point
+ __ lookup_virtual_method(x10, index, method);
+ __ profile_arguments_type(x13, method, x14, true);
+ __ jump_from_interpreted(method);
+}
+
+void TemplateTable::invokevirtual(int byte_no)
+{
+ transition(vtos, vtos);
+ assert(byte_no == f2_byte, "use this argument");
+
+ prepare_invoke(byte_no, xmethod, noreg, x12, x13);
+
+ // xmethod: index (actually a Method*)
+ // x12: receiver
+ // x13: flags
+
+ invokevirtual_helper(xmethod, x12, x13);
+}
+
+void TemplateTable::invokespecial(int byte_no)
+{
+ transition(vtos, vtos);
+ assert(byte_no == f1_byte, "use this argument");
+
+ prepare_invoke(byte_no, xmethod, noreg, // get f1 Method*
+ x12); // get receiver also for null check
+ __ verify_oop(x12);
+ __ null_check(x12);
+ // do the call
+ __ profile_call(x10);
+ __ profile_arguments_type(x10, xmethod, xbcp, false);
+ __ jump_from_interpreted(xmethod);
+}
+
+void TemplateTable::invokestatic(int byte_no)
+{
+ transition(vtos, vtos);
+ assert(byte_no == f1_byte, "use this arugment");
+
+ prepare_invoke(byte_no, xmethod); // get f1 Method*
+ // do the call
+ __ profile_call(x10);
+ __ profile_arguments_type(x10, xmethod, x14, false);
+ __ jump_from_interpreted(xmethod);
+}
+
+void TemplateTable::fast_invokevfinal(int byte_no)
+{
+ __ call_Unimplemented();
+}
+
+void TemplateTable::invokeinterface(int byte_no) {
+ transition(vtos, vtos);
+ assert(byte_no == f1_byte, "use this argument");
+
+ prepare_invoke(byte_no, x10, xmethod, // get f1 Klass*, f2 Method*
+ x12, x13); // recv, flags
+
+ // x10: interface klass (from f1)
+ // xmethod: method (from f2)
+ // x12: receiver
+ // x13: flags
+
+ // First check for Object case, then private interface method,
+ // then regular interface method.
+
+ // Special case of invokeinterface called for virtual method of
+ // java.lang.Object. See cpCache.cpp for details
+ Label notObjectMethod;
+ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_forced_virtual_shift);
+ __ beqz(t0, notObjectMethod);
+
+ invokevirtual_helper(xmethod, x12, x13);
+ __ bind(notObjectMethod);
+
+ Label no_such_interface;
+
+ // Check for private method invocation - indicated by vfinal
+ Label notVFinal;
+ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_vfinal_shift);
+ __ beqz(t0, notVFinal);
+
+ // Check receiver klass into x13 - also a null check
+ __ null_check(x12, oopDesc::klass_offset_in_bytes());
+ __ load_klass(x13, x12);
+
+ Label subtype;
+ __ check_klass_subtype(x13, x10, x14, subtype);
+ // If we get here the typecheck failed
+ __ j(no_such_interface);
+ __ bind(subtype);
+
+ __ profile_final_call(x10);
+ __ profile_arguments_type(x10, xmethod, x14, true);
+ __ jump_from_interpreted(xmethod);
+
+ __ bind(notVFinal);
+
+ // Get receiver klass into x13 - also a null check
+ __ restore_locals();
+ __ null_check(x12, oopDesc::klass_offset_in_bytes());
+ __ load_klass(x13, x12);
+
+ Label no_such_method;
+
+ // Preserve method for the throw_AbstractMethodErrorVerbose.
+ __ mv(x28, xmethod);
+ // Receiver subtype check against REFC.
+ // Superklass in x10. Subklass in x13. Blows t1, x30
+ __ lookup_interface_method(// inputs: rec. class, interface, itable index
+ x13, x10, noreg,
+ // outputs: scan temp. reg, scan temp. reg
+ t1, x30,
+ no_such_interface,
+ /*return_method=*/false);
+
+ // profile this call
+ __ profile_virtual_call(x13, x30, x9);
+
+ // Get declaring interface class from method, and itable index
+ __ load_method_holder(x10, xmethod);
+ __ lwu(xmethod, Address(xmethod, Method::itable_index_offset()));
+ __ subw(xmethod, xmethod, Method::itable_index_max);
+ __ negw(xmethod, xmethod);
+
+ // Preserve recvKlass for throw_AbstractMethodErrorVerbose
+ __ mv(xlocals, x13);
+ __ lookup_interface_method(// inputs: rec. class, interface, itable index
+ xlocals, x10, xmethod,
+ // outputs: method, scan temp. reg
+ xmethod, x30,
+ no_such_interface);
+
+ // xmethod: Method to call
+ // x12: receiver
+ // Check for abstract method error
+ // Note: This should be done more efficiently via a throw_abstract_method_error
+ // interpreter entry point and a conditional jump to it in case of a null
+ // method.
+ __ beqz(xmethod, no_such_method);
+
+ __ profile_arguments_type(x13, xmethod, x30, true);
+
+ // do the call
+ // x12: receiver
+ // xmethod: Method
+ __ jump_from_interpreted(xmethod);
+ __ should_not_reach_here();
+
+ // exception handling code follows ...
+ // note: must restore interpreter registers to canonical
+ // state for exception handling to work correctly!
+
+ __ bind(no_such_method);
+ // throw exception
+ __ restore_bcp(); // bcp must be correct for exception handler (was destroyed)
+ __ restore_locals(); // make sure locals pointer is correct as well (was destroyed)
+ // Pass arguments for generating a verbose error message.
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), x13, x28);
+ // the call_VM checks for exception, so we should never return here.
+ __ should_not_reach_here();
+
+ __ bind(no_such_interface);
+ // throw exceptiong
+ __ restore_bcp(); // bcp must be correct for exception handler (was destroyed)
+ __ restore_locals(); // make sure locals pointer is correct as well (was destroyed)
+ // Pass arguments for generating a verbose error message.
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), x13, x10);
+ // the call_VM checks for exception, so we should never return here.
+ __ should_not_reach_here();
+ return;
+}
+
+void TemplateTable::invokehandle(int byte_no) {
+ transition(vtos, vtos);
+ assert(byte_no == f1_byte, "use this argument");
+
+ prepare_invoke(byte_no, xmethod, x10, x12);
+ __ verify_method_ptr(x12);
+ __ verify_oop(x12);
+ __ null_check(x12);
+
+ // FIXME: profile the LambdaForm also
+
+ // x30 is safe to use here as a temp reg because it is about to
+ // be clobbered by jump_from_interpreted().
+ __ profile_final_call(x30);
+ __ profile_arguments_type(x30, xmethod, x14, true);
+
+ __ jump_from_interpreted(xmethod);
+}
+
+void TemplateTable::invokedynamic(int byte_no) {
+ transition(vtos, vtos);
+ assert(byte_no == f1_byte, "use this argument");
+
+ prepare_invoke(byte_no, xmethod, x10);
+
+ // x10: CallSite object (from cpool->resolved_references[])
+ // xmethod: MH.linkToCallSite method (from f2)
+
+ // Note: x10_callsite is already pushed by prepare_invoke
+
+ // %%% should make a type profile for any invokedynamic that takes a ref argument
+ // profile this call
+ __ profile_call(xbcp);
+ __ profile_arguments_type(x13, xmethod, x30, false);
+
+ __ verify_oop(x10);
+
+ __ jump_from_interpreted(xmethod);
+}
+
+//-----------------------------------------------------------------------------
+// Allocation
+
+void TemplateTable::_new() {
+ transition(vtos, atos);
+
+ __ get_unsigned_2_byte_index_at_bcp(x13, 1);
+ Label slow_case;
+ Label done;
+ Label initialize_header;
+ Label initialize_object; // including clearing the fields
+
+ __ get_cpool_and_tags(x14, x10);
+ // Make sure the class we're about to instantiate has been resolved.
+ // This is done before loading InstanceKlass to be consistent with the order
+ // how Constant Pool is update (see ConstantPool::klass_at_put)
+ const int tags_offset = Array::base_offset_in_bytes();
+ __ add(t0, x10, x13);
+ __ la(t0, Address(t0, tags_offset));
+ __ membar(MacroAssembler::AnyAny);
+ __ lbu(t0, t0);
+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+ __ sub(t1, t0, (u1)JVM_CONSTANT_Class);
+ __ bnez(t1, slow_case);
+
+ // get InstanceKlass
+ __ load_resolved_klass_at_offset(x14, x13, x14, t0);
+
+ // make sure klass is initialized & doesn't have finalizer
+ // make sure klass is fully initialized
+ __ lbu(t0, Address(x14, InstanceKlass::init_state_offset()));
+ __ sub(t1, t0, (u1)InstanceKlass::fully_initialized);
+ __ bnez(t1, slow_case);
+
+ // get instance_size in InstanceKlass (scaled to a count of bytes)
+ __ lwu(x13, Address(x14, Klass::layout_helper_offset()));
+ // test to see if it has a finalizer or is malformed in some way
+ __ andi(t0, x13, Klass::_lh_instance_slow_path_bit);
+ __ bnez(t0, slow_case);
+
+ // Allocate the instance:
+ // If TLAB is enabled:
+ // Try to allocate in the TLAB.
+ // If fails, go to the slow path.
+ // Else If inline contiguous allocations are enabled:
+ // Try to allocate in eden.
+ // If fails due to heap end, go to slow path
+ //
+ // If TLAB is enabled OR inline contiguous is enabled:
+ // Initialize the allocation.
+ // Exit.
+ // Go to slow path.
+ const bool allow_shared_alloc = Universe::heap()->supports_inline_contig_alloc();
+
+ if (UseTLAB) {
+ __ tlab_allocate(x10, x13, 0, noreg, x11, slow_case);
+
+ if (ZeroTLAB) {
+ // the fields have been already cleared
+ __ j(initialize_header);
+ } else {
+ // initialize both the header and fields
+ __ j(initialize_object);
+ }
+ } else {
+ // Allocation in the shared Eden, if allowed.
+ //
+ // x13: instance size in bytes
+ if (allow_shared_alloc) {
+ __ eden_allocate(x10, x13, 0, x28, slow_case);
+ }
+ }
+
+ // If USETLAB or allow_shared_alloc are true, the object is created above and
+ // there is an initialized need. Otherwise, skip and go to the slow path.
+ if (UseTLAB || allow_shared_alloc) {
+ // The object is initialized before the header. If the object size is
+ // zero, go directly to the header initialization.
+ __ bind(initialize_object);
+ __ sub(x13, x13, sizeof(oopDesc));
+ __ beqz(x13, initialize_header);
+
+ // Initialize obejct fields
+ {
+ __ add(x12, x10, sizeof(oopDesc));
+ Label loop;
+ __ bind(loop);
+ __ sd(zr, Address(x12));
+ __ add(x12, x12, BytesPerLong);
+ __ sub(x13, x13, BytesPerLong);
+ __ bnez(x13, loop);
+ }
+
+ // initialize object hader only.
+ __ bind(initialize_header);
+ __ mv(t0, (intptr_t)markWord::prototype().value());
+ __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes()));
+ __ store_klass_gap(x10, zr); // zero klass gap for compressed oops
+ __ store_klass(x10, x14); // store klass last
+
+ {
+ SkipIfEqual skip(_masm, &DTraceAllocProbes, false);
+ // Trigger dtrace event for fastpath
+ __ push(atos); // save the return value
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast(SharedRuntime::dtrace_object_alloc)), x10);
+ __ pop(atos); // restore the return value
+ }
+ __ j(done);
+ }
+
+ // slow case
+ __ bind(slow_case);
+ __ get_constant_pool(c_rarg1);
+ __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
+ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), c_rarg1, c_rarg2);
+ __ verify_oop(x10);
+
+ // continue
+ __ bind(done);
+ // Must prevent reordering of stores for object initialization with stores that publish the new object.
+ __ membar(MacroAssembler::StoreStore);
+}
+
+void TemplateTable::newarray() {
+ transition(itos, atos);
+ __ load_unsigned_byte(c_rarg1, at_bcp(1));
+ __ mv(c_rarg2, x10);
+ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray),
+ c_rarg1, c_rarg2);
+ // Must prevent reordering of stores for object initialization with stores that publish the new object.
+ __ membar(MacroAssembler::StoreStore);
+}
+
+void TemplateTable::anewarray() {
+ transition(itos, atos);
+ __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
+ __ get_constant_pool(c_rarg1);
+ __ mv(c_rarg3, x10);
+ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray),
+ c_rarg1, c_rarg2, c_rarg3);
+ // Must prevent reordering of stores for object initialization with stores that publish the new object.
+ __ membar(MacroAssembler::StoreStore);
+}
+
+void TemplateTable::arraylength() {
+ transition(atos, itos);
+ __ null_check(x10, arrayOopDesc::length_offset_in_bytes());
+ __ lwu(x10, Address(x10, arrayOopDesc::length_offset_in_bytes()));
+}
+
+void TemplateTable::checkcast()
+{
+ transition(atos, atos);
+ Label done, is_null, ok_is_subtype, quicked, resolved;
+ __ beqz(x10, is_null);
+
+ // Get cpool & tags index
+ __ get_cpool_and_tags(x12, x13); // x12=cpool, x13=tags array
+ __ get_unsigned_2_byte_index_at_bcp(x9, 1); // x9=index
+ // See if bytecode has already been quicked
+ __ add(t0, x13, Array::base_offset_in_bytes());
+ __ add(x11, t0, x9);
+ __ membar(MacroAssembler::AnyAny);
+ __ lbu(x11, x11);
+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+ __ sub(t0, x11, (u1)JVM_CONSTANT_Class);
+ __ beqz(t0, quicked);
+
+ __ push(atos); // save receiver for result, and for GC
+ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
+ // vm_result_2 has metadata result
+ __ get_vm_result_2(x10, xthread);
+ __ pop_reg(x13); // restore receiver
+ __ j(resolved);
+
+ // Get superklass in x10 and subklass in x13
+ __ bind(quicked);
+ __ mv(x13, x10); // Save object in x13; x10 needed for subtype check
+ __ load_resolved_klass_at_offset(x12, x9, x10, t0); // x10 = klass
+
+ __ bind(resolved);
+ __ load_klass(x9, x13);
+
+ // Generate subtype check. Blows x12, x15. Object in x13.
+ // Superklass in x10. Subklass in x9.
+ __ gen_subtype_check(x9, ok_is_subtype);
+
+ // Come here on failure
+ __ push_reg(x13);
+ // object is at TOS
+ __ j(Interpreter::_throw_ClassCastException_entry);
+
+ // Come here on success
+ __ bind(ok_is_subtype);
+ __ mv(x10, x13); // Restore object in x13
+
+ // Collect counts on whether this test sees NULLs a lot or not.
+ if (ProfileInterpreter) {
+ __ j(done);
+ __ bind(is_null);
+ __ profile_null_seen(x12);
+ } else {
+ __ bind(is_null); // same as 'done'
+ }
+ __ bind(done);
+}
+
+void TemplateTable::instanceof() {
+ transition(atos, itos);
+ Label done, is_null, ok_is_subtype, quicked, resolved;
+ __ beqz(x10, is_null);
+
+ // Get cpool & tags index
+ __ get_cpool_and_tags(x12, x13); // x12=cpool, x13=tags array
+ __ get_unsigned_2_byte_index_at_bcp(x9, 1); // x9=index
+ // See if bytecode has already been quicked
+ __ add(t0, x13, Array::base_offset_in_bytes());
+ __ add(x11, t0, x9);
+ __ membar(MacroAssembler::AnyAny);
+ __ lbu(x11, x11);
+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+ __ sub(t0, x11, (u1)JVM_CONSTANT_Class);
+ __ beqz(t0, quicked);
+
+ __ push(atos); // save receiver for result, and for GC
+ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
+ // vm_result_2 has metadata result
+ __ get_vm_result_2(x10, xthread);
+ __ pop_reg(x13); // restore receiver
+ __ verify_oop(x13);
+ __ load_klass(x13, x13);
+ __ j(resolved);
+
+ // Get superklass in x10 and subklass in x13
+ __ bind(quicked);
+ __ load_klass(x13, x10);
+ __ load_resolved_klass_at_offset(x12, x9, x10, t0);
+
+ __ bind(resolved);
+
+ // Generate subtype check. Blows x12, x15
+ // Superklass in x10. Subklass in x13.
+ __ gen_subtype_check(x13, ok_is_subtype);
+
+ // Come here on failure
+ __ mv(x10, zr);
+ __ j(done);
+ // Come here on success
+ __ bind(ok_is_subtype);
+ __ li(x10, 1);
+
+ // Collect counts on whether this test sees NULLs a lot or not.
+ if (ProfileInterpreter) {
+ __ j(done);
+ __ bind(is_null);
+ __ profile_null_seen(x12);
+ } else {
+ __ bind(is_null); // same as 'done'
+ }
+ __ bind(done);
+ // x10 = 0: obj == NULL or obj is not an instanceof the specified klass
+ // x10 = 1: obj != NULL and obj is an instanceof the specified klass
+}
+
+//-----------------------------------------------------------------------------
+// Breakpoints
+void TemplateTable::_breakpoint() {
+ // Note: We get here even if we are single stepping..
+ // jbug inists on setting breakpoints at every bytecode
+ // even if we are in single step mode.
+
+ transition(vtos, vtos);
+
+ // get the unpatched byte code
+ __ get_method(c_rarg1);
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::get_original_bytecode_at),
+ c_rarg1, xbcp);
+ __ mv(x9, x10);
+
+ // post the breakpoint event
+ __ call_VM(noreg,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint),
+ xmethod, xbcp);
+
+ // complete the execution of original bytecode
+ __ mv(t0, x9);
+ __ dispatch_only_normal(vtos);
+}
+
+//-----------------------------------------------------------------------------
+// Exceptions
+
+void TemplateTable::athrow() {
+ transition(atos, vtos);
+ __ null_check(x10);
+ __ j(Interpreter::throw_exception_entry());
+}
+
+//-----------------------------------------------------------------------------
+// Synchronization
+//
+// Note: monitorenter & exit are symmetric routines; which is reflected
+// in the assembly code structure as well
+//
+// Stack layout:
+//
+// [expressions ] <--- esp = expression stack top
+// ..
+// [expressions ]
+// [monitor entry] <--- monitor block top = expression stack bot
+// ..
+// [monitor entry]
+// [frame data ] <--- monitor block bot
+// ...
+// [saved fp ] <--- fp
+void TemplateTable::monitorenter()
+{
+ transition(atos, vtos);
+
+ // check for NULL object
+ __ null_check(x10);
+
+ const Address monitor_block_top(
+ fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+ const Address monitor_block_bot(
+ fp, frame::interpreter_frame_initial_sp_offset * wordSize);
+ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+
+ Label allocated;
+
+ // initialize entry pointer
+ __ mv(c_rarg1, zr); // points to free slot or NULL
+
+ // find a free slot in the monitor block (result in c_rarg1)
+ {
+ Label entry, loop, exit, notUsed;
+ __ ld(c_rarg3, monitor_block_top); // points to current entry,
+ // starting with top-most entry
+ __ la(c_rarg2, monitor_block_bot); // points to word before bottom
+
+ __ j(entry);
+
+ __ bind(loop);
+ // check if current entry is used
+ // if not used then remember entry in c_rarg1
+ __ ld(t0, Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes()));
+ __ bnez(t0, notUsed);
+ __ mv(c_rarg1, c_rarg3);
+ __ bind(notUsed);
+ // check if current entry is for same object
+ // if same object then stop searching
+ __ beq(x10, t0, exit);
+ // otherwise advance to next entry
+ __ add(c_rarg3, c_rarg3, entry_size);
+ __ bind(entry);
+ // check if bottom reached
+ // if not at bottom then check this entry
+ __ bne(c_rarg3, c_rarg2, loop);
+ __ bind(exit);
+ }
+
+ __ bnez(c_rarg1, allocated); // check if a slot has been found and
+ // if found, continue with that on
+
+ // allocate one if there's no free slot
+ {
+ Label entry, loop;
+ // 1. compute new pointers // esp: old expression stack top
+ __ ld(c_rarg1, monitor_block_bot); // c_rarg1: old expression stack bottom
+ __ sub(esp, esp, entry_size); // move expression stack top
+ __ sub(c_rarg1, c_rarg1, entry_size); // move expression stack bottom
+ __ mv(c_rarg3, esp); // set start value for copy loop
+ __ sd(c_rarg1, monitor_block_bot); // set new monitor block bottom
+ __ sub(sp, sp, entry_size); // make room for the monitor
+
+ __ j(entry);
+ // 2. move expression stack contents
+ __ bind(loop);
+ __ ld(c_rarg2, Address(c_rarg3, entry_size)); // load expression stack
+ // word from old location
+ __ sd(c_rarg2, Address(c_rarg3, 0)); // and store it at new location
+ __ add(c_rarg3, c_rarg3, wordSize); // advance to next word
+ __ bind(entry);
+ __ bne(c_rarg3, c_rarg1, loop); // check if bottom reached.if not at bottom
+ // then copy next word
+ }
+
+ // call run-time routine
+ // c_rarg1: points to monitor entry
+ __ bind(allocated);
+
+ // Increment bcp to point to the next bytecode, so exception
+ // handling for async. exceptions work correctly.
+ // The object has already been poped from the stack, so the
+ // expression stack looks correct.
+ __ addi(xbcp, xbcp, 1);
+
+ // store object
+ __ sd(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
+ __ lock_object(c_rarg1);
+
+ // check to make sure this monitor doesn't cause stack overflow after locking
+ __ save_bcp(); // in case of exception
+ __ generate_stack_overflow_check(0);
+
+ // The bcp has already been incremented. Just need to dispatch to
+ // next instruction.
+ __ dispatch_next(vtos);
+}
+
+void TemplateTable::monitorexit()
+{
+ transition(atos, vtos);
+
+ // check for NULL object
+ __ null_check(x10);
+
+ const Address monitor_block_top(
+ fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+ const Address monitor_block_bot(
+ fp, frame::interpreter_frame_initial_sp_offset * wordSize);
+ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+
+ Label found;
+
+ // find matching slot
+ {
+ Label entry, loop;
+ __ ld(c_rarg1, monitor_block_top); // points to current entry,
+ // starting with top-most entry
+ __ la(c_rarg2, monitor_block_bot); // points to word before bottom
+ // of monitor block
+ __ j(entry);
+
+ __ bind(loop);
+ // check if current entry is for same object
+ __ ld(t0, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
+ // if same object then stop searching
+ __ beq(x10, t0, found);
+ // otherwise advance to next entry
+ __ add(c_rarg1, c_rarg1, entry_size);
+ __ bind(entry);
+ // check if bottom reached
+ // if not at bottom then check this entry
+ __ bne(c_rarg1, c_rarg2, loop);
+ }
+
+ // error handling. Unlocking was not block-structured
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+ InterpreterRuntime::throw_illegal_monitor_state_exception));
+ __ should_not_reach_here();
+
+ // call run-time routine
+ __ bind(found);
+ __ push_ptr(x10); // make sure object is on stack (contract with oopMaps)
+ __ unlock_object(c_rarg1);
+ __ pop_ptr(x10); // discard object
+}
+
+// Wide instructions
+void TemplateTable::wide()
+{
+ __ load_unsigned_byte(x9, at_bcp(1));
+ __ mv(t0, (address)Interpreter::_wentry_point);
+ __ shadd(t0, x9, t0, t1, 3);
+ __ ld(t0, Address(t0));
+ __ jr(t0);
+}
+
+// Multi arrays
+void TemplateTable::multianewarray() {
+ transition(vtos, atos);
+ __ load_unsigned_byte(x10, at_bcp(3)); // get number of dimensions
+ // last dim is on top of stack; we want address of first one:
+ // first_addr = last_addr + (ndims - 1) * wordSize
+ __ shadd(c_rarg1, x10, esp, c_rarg1, 3);
+ __ sub(c_rarg1, c_rarg1, wordSize);
+ call_VM(x10,
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray),
+ c_rarg1);
+ __ load_unsigned_byte(x11, at_bcp(3));
+ __ shadd(esp, x11, esp, t0, 3);
+}
diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.hpp b/src/hotspot/cpu/riscv/templateTable_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..fcc86108d2839da91af45bb312fc8fdb27116125
--- /dev/null
+++ b/src/hotspot/cpu/riscv/templateTable_riscv.hpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_TEMPLATETABLE_RISCV_HPP
+#define CPU_RISCV_TEMPLATETABLE_RISCV_HPP
+
+static void prepare_invoke(int byte_no,
+ Register method, // linked method (or i-klass)
+ Register index = noreg, // itable index, MethodType, etc.
+ Register recv = noreg, // if caller wants to see it
+ Register flags = noreg // if caller wants to test it
+ );
+static void invokevirtual_helper(Register index, Register recv,
+ Register flags);
+
+// Helpers
+static void index_check(Register array, Register index);
+
+#endif // CPU_RISCV_TEMPLATETABLE_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp b/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4f50adb05c3155babdd3a96e32e6b9784e8ac694
--- /dev/null
+++ b/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "prims/universalNativeInvoker.hpp"
+#include "utilities/debug.hpp"
+
+address ProgrammableInvoker::generate_adapter(jobject jabi, jobject jlayout) {
+ Unimplemented();
+ return nullptr;
+}
diff --git a/src/java.base/unix/native/libnet/InetAddressImplFactory.c b/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp
similarity index 56%
rename from src/java.base/unix/native/libnet/InetAddressImplFactory.c
rename to src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp
index 2c799feb105dcf88ee78d2cdfb8493273c9b7817..ce70da72f2e468b52c5275d004eeb84212d4b90c 100644
--- a/src/java.base/unix/native/libnet/InetAddressImplFactory.c
+++ b/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp
@@ -1,12 +1,11 @@
/*
- * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation. Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
+ * published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
@@ -21,26 +20,23 @@
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
+ *
*/
-#include "java_net_InetAddressImplFactory.h"
+#include "precompiled.hpp"
+#include "prims/universalUpcallHandler.hpp"
+#include "utilities/debug.hpp"
-#include "net_util.h"
+address ProgrammableUpcallHandler::generate_upcall_stub(jobject jrec, jobject jabi, jobject jlayout) {
+ Unimplemented();
+ return nullptr;
+}
-/************************************************************************
- * InetAddressImplFactory
- */
+address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject mh, Method* entry, jobject jabi, jobject jconv) {
+ ShouldNotCallThis();
+ return nullptr;
+}
-/*
- * Class: java_net_InetAddressImplFactory
- * Method: isIPv6Supported
- * Signature: ()I
- */
-JNIEXPORT jboolean JNICALL
-Java_java_net_InetAddressImplFactory_isIPv6Supported(JNIEnv *env, jclass cls) {
- if (ipv6_available()) {
- return JNI_TRUE;
- } else {
- return JNI_FALSE;
- }
+bool ProgrammableUpcallHandler::supports_optimized_upcalls() {
+ return false;
}
diff --git a/src/hotspot/cpu/riscv/vmStructs_riscv.hpp b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..6c89133de02800c38da1476eb9e44f4fe2603049
--- /dev/null
+++ b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_VMSTRUCTS_RISCV_HPP
+#define CPU_RISCV_VMSTRUCTS_RISCV_HPP
+
+// These are the CPU-specific fields, types and integer
+// constants required by the Serviceability Agent. This file is
+// referenced by vmStructs.cpp.
+
+#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
+ volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*)
+
+#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
+
+#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
+
+#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
+
+#endif // CPU_RISCV_VMSTRUCTS_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..559f8b5e4ea242666348b9e79d3c887edf9bbe10
--- /dev/null
+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/java.hpp"
+#include "runtime/os.hpp"
+#include "runtime/vm_version.hpp"
+#include "utilities/formatBuffer.hpp"
+#include "utilities/macros.hpp"
+
+#include OS_HEADER_INLINE(os)
+
+const char* VM_Version::_uarch = "";
+uint32_t VM_Version::_initial_vector_length = 0;
+
+void VM_Version::initialize() {
+ get_os_cpu_info();
+
+ if (FLAG_IS_DEFAULT(UseFMA)) {
+ FLAG_SET_DEFAULT(UseFMA, true);
+ }
+
+ if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
+ FLAG_SET_DEFAULT(AllocatePrefetchDistance, 0);
+ }
+
+ if (UseAES || UseAESIntrinsics) {
+ if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
+ warning("AES instructions are not available on this CPU");
+ FLAG_SET_DEFAULT(UseAES, false);
+ }
+ if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+ warning("AES intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+ }
+ }
+
+ if (UseAESCTRIntrinsics) {
+ warning("AES/CTR intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+ }
+
+ if (UseSHA) {
+ warning("SHA instructions are not available on this CPU");
+ FLAG_SET_DEFAULT(UseSHA, false);
+ }
+
+ if (UseSHA1Intrinsics) {
+ warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
+ FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+ }
+
+ if (UseSHA256Intrinsics) {
+ warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
+ FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+ }
+
+ if (UseSHA512Intrinsics) {
+ warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
+ FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
+ }
+
+ if (UseSHA3Intrinsics) {
+ warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
+ FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
+ }
+
+ if (UseCRC32Intrinsics) {
+ warning("CRC32 intrinsics are not available on this CPU.");
+ FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
+ }
+
+ if (UseCRC32CIntrinsics) {
+ warning("CRC32C intrinsics are not available on this CPU.");
+ FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
+ }
+
+ if (UseMD5Intrinsics) {
+ warning("MD5 intrinsics are not available on this CPU.");
+ FLAG_SET_DEFAULT(UseMD5Intrinsics, false);
+ }
+
+ if (UseRVV) {
+ if (!(_features & CPU_V)) {
+ warning("RVV is not supported on this CPU");
+ FLAG_SET_DEFAULT(UseRVV, false);
+ } else {
+ // read vector length from vector CSR vlenb
+ _initial_vector_length = get_current_vector_length();
+ }
+ }
+
+ if (UseRVC && !(_features & CPU_C)) {
+ warning("RVC is not supported on this CPU");
+ FLAG_SET_DEFAULT(UseRVC, false);
+ }
+
+ if (FLAG_IS_DEFAULT(AvoidUnalignedAccesses)) {
+ FLAG_SET_DEFAULT(AvoidUnalignedAccesses, true);
+ }
+
+ if (UseZbb) {
+ if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
+ FLAG_SET_DEFAULT(UsePopCountInstruction, true);
+ }
+ } else {
+ FLAG_SET_DEFAULT(UsePopCountInstruction, false);
+ }
+
+ char buf[512];
+ buf[0] = '\0';
+ if (_uarch != NULL && strcmp(_uarch, "") != 0) snprintf(buf, sizeof(buf), "%s,", _uarch);
+ strcat(buf, "rv64");
+#define ADD_FEATURE_IF_SUPPORTED(id, name, bit) if (_features & CPU_##id) strcat(buf, name);
+ CPU_FEATURE_FLAGS(ADD_FEATURE_IF_SUPPORTED)
+#undef ADD_FEATURE_IF_SUPPORTED
+
+ _features_string = os::strdup(buf);
+
+#ifdef COMPILER2
+ c2_initialize();
+#endif // COMPILER2
+}
+
+#ifdef COMPILER2
+void VM_Version::c2_initialize() {
+ if (UseCMoveUnconditionally) {
+ FLAG_SET_DEFAULT(UseCMoveUnconditionally, false);
+ }
+
+ if (ConditionalMoveLimit > 0) {
+ FLAG_SET_DEFAULT(ConditionalMoveLimit, 0);
+ }
+
+ if (!UseRVV) {
+ FLAG_SET_DEFAULT(SpecialEncodeISOArray, false);
+ }
+
+ if (!UseRVV && MaxVectorSize) {
+ FLAG_SET_DEFAULT(MaxVectorSize, 0);
+ }
+
+ if (!UseRVV) {
+ FLAG_SET_DEFAULT(UseRVVForBigIntegerShiftIntrinsics, false);
+ }
+
+ if (UseRVV) {
+ if (FLAG_IS_DEFAULT(MaxVectorSize)) {
+ MaxVectorSize = _initial_vector_length;
+ } else if (MaxVectorSize < 16) {
+ warning("RVV does not support vector length less than 16 bytes. Disabling RVV.");
+ UseRVV = false;
+ } else if (is_power_of_2(MaxVectorSize)) {
+ if (MaxVectorSize > _initial_vector_length) {
+ warning("Current system only supports max RVV vector length %d. Set MaxVectorSize to %d",
+ _initial_vector_length, _initial_vector_length);
+ }
+ MaxVectorSize = _initial_vector_length;
+ } else {
+ vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize));
+ }
+ }
+
+ // disable prefetch
+ if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
+ FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
+ }
+
+ if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
+ FLAG_SET_DEFAULT(UseMulAddIntrinsic, true);
+ }
+
+ if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
+ FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true);
+ }
+
+ if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
+ FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, true);
+ }
+
+ if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
+ FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true);
+ }
+
+ if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
+ FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true);
+ }
+}
+#endif // COMPILER2
+
+void VM_Version::initialize_cpu_information(void) {
+ // do nothing if cpu info has been initialized
+ if (_initialized) {
+ return;
+ }
+
+ _no_of_cores = os::processor_count();
+ _no_of_threads = _no_of_cores;
+ _no_of_sockets = _no_of_cores;
+ snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64");
+ snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string);
+ _initialized = true;
+}
diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..1d6eeb97504c94673cc19392ea25f47aebed6240
--- /dev/null
+++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_VM_VERSION_RISCV_HPP
+#define CPU_RISCV_VM_VERSION_RISCV_HPP
+
+#include "runtime/abstract_vm_version.hpp"
+#include "runtime/arguments.hpp"
+#include "runtime/globals_extension.hpp"
+#include "utilities/sizes.hpp"
+
+class VM_Version : public Abstract_VM_Version {
+#ifdef COMPILER2
+private:
+ static void c2_initialize();
+#endif // COMPILER2
+
+protected:
+ static const char* _uarch;
+ static uint32_t _initial_vector_length;
+ static void get_os_cpu_info();
+ static uint32_t get_current_vector_length();
+
+public:
+ // Initialization
+ static void initialize();
+
+ constexpr static bool supports_stack_watermark_barrier() { return true; }
+
+ enum Feature_Flag {
+#define CPU_FEATURE_FLAGS(decl) \
+ decl(I, "i", 8) \
+ decl(M, "m", 12) \
+ decl(A, "a", 0) \
+ decl(F, "f", 5) \
+ decl(D, "d", 3) \
+ decl(C, "c", 2) \
+ decl(V, "v", 21)
+
+#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1 << bit),
+ CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
+#undef DECLARE_CPU_FEATURE_FLAG
+ };
+
+ static void initialize_cpu_information(void);
+};
+
+#endif // CPU_RISCV_VM_VERSION_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..aa7222dc64a477f90f549bf10d52c1a7f762b568
--- /dev/null
+++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "code/vmreg.hpp"
+
+void VMRegImpl::set_regName() {
+ int i = 0;
+ Register reg = ::as_Register(0);
+ for ( ; i < ConcreteRegisterImpl::max_gpr ; ) {
+ for (int j = 0 ; j < RegisterImpl::max_slots_per_register ; j++) {
+ regName[i++] = reg->name();
+ }
+ reg = reg->successor();
+ }
+
+ FloatRegister freg = ::as_FloatRegister(0);
+ for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
+ for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) {
+ regName[i++] = reg->name();
+ }
+ freg = freg->successor();
+ }
+
+ VectorRegister vreg = ::as_VectorRegister(0);
+ for ( ; i < ConcreteRegisterImpl::max_vpr ; ) {
+ for (int j = 0 ; j < VectorRegisterImpl::max_slots_per_register ; j++) {
+ regName[i++] = reg->name();
+ }
+ vreg = vreg->successor();
+ }
+
+ for ( ; i < ConcreteRegisterImpl::number_of_registers ; i++) {
+ regName[i] = "NON-GPR-FPR-VPR";
+ }
+}
+
+VMReg VMRegImpl::vmStorageToVMReg(int type, int index) {
+ Unimplemented();
+ return VMRegImpl::Bad();
+}
diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..9e611b1f67110b05b8f0a7db6447d0d59510e2d7
--- /dev/null
+++ b/src/hotspot/cpu/riscv/vmreg_riscv.hpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_VMREG_RISCV_HPP
+#define CPU_RISCV_VMREG_RISCV_HPP
+
+inline bool is_Register() {
+ return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr;
+}
+
+inline bool is_FloatRegister() {
+ return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
+}
+
+inline bool is_VectorRegister() {
+ return value() >= ConcreteRegisterImpl::max_fpr && value() < ConcreteRegisterImpl::max_vpr;
+}
+
+inline Register as_Register() {
+ assert(is_Register(), "must be");
+ return ::as_Register(value() / RegisterImpl::max_slots_per_register);
+}
+
+inline FloatRegister as_FloatRegister() {
+ assert(is_FloatRegister() && is_even(value()), "must be");
+ return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) /
+ FloatRegisterImpl::max_slots_per_register);
+}
+
+inline VectorRegister as_VectorRegister() {
+ assert(is_VectorRegister() && ((value() & (VectorRegisterImpl::max_slots_per_register - 1)) == 0), "must be");
+ return ::as_VectorRegister((value() - ConcreteRegisterImpl::max_fpr) /
+ VectorRegisterImpl::max_slots_per_register);
+}
+
+inline bool is_concrete() {
+ assert(is_reg(), "must be");
+ if (is_VectorRegister()) {
+ int base = value() - ConcreteRegisterImpl::max_fpr;
+ return (base % VectorRegisterImpl::max_slots_per_register) == 0;
+ } else {
+ return is_even(value());
+ }
+}
+
+#endif // CPU_RISCV_VMREG_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..06b70020b4b98170e89e291d33c465715894c112
--- /dev/null
+++ b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
+#define CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
+
+inline VMReg RegisterImpl::as_VMReg() const {
+ if (this == noreg) {
+ return VMRegImpl::Bad();
+ }
+ return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register);
+}
+
+inline VMReg FloatRegisterImpl::as_VMReg() const {
+ return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) +
+ ConcreteRegisterImpl::max_gpr);
+}
+
+inline VMReg VectorRegisterImpl::as_VMReg() const {
+ return VMRegImpl::as_VMReg((encoding() * VectorRegisterImpl::max_slots_per_register) +
+ ConcreteRegisterImpl::max_fpr);
+}
+
+#endif // CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
diff --git a/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..78b81138003f6c37698df683a8ef9c1b6779c08f
--- /dev/null
+++ b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "assembler_riscv.inline.hpp"
+#include "code/vtableStubs.hpp"
+#include "interp_masm_riscv.hpp"
+#include "memory/resourceArea.hpp"
+#include "oops/compiledICHolder.hpp"
+#include "oops/instanceKlass.hpp"
+#include "oops/klassVtable.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_riscv.inline.hpp"
+#ifdef COMPILER2
+#include "opto/runtime.hpp"
+#endif
+
+// machine-dependent part of VtableStubs: create VtableStub of correct size and
+// initialize its code
+
+#define __ masm->
+
+#ifndef PRODUCT
+extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
+#endif
+
+VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
+ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+ const int stub_code_length = code_size_limit(true);
+ VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
+ // Can be NULL if there is no free space in the code cache.
+ if (s == NULL) {
+ return NULL;
+ }
+
+ // Count unused bytes in instruction sequences of variable size.
+ // We add them to the computed buffer size in order to avoid
+ // overflow in subsequently generated stubs.
+ address start_pc = NULL;
+ int slop_bytes = 0;
+ int slop_delta = 0;
+
+ ResourceMark rm;
+ CodeBuffer cb(s->entry_point(), stub_code_length);
+ MacroAssembler* masm = new MacroAssembler(&cb);
+ assert_cond(masm != NULL);
+
+#if (!defined(PRODUCT) && defined(COMPILER2))
+ if (CountCompiledCalls) {
+ __ la(t2, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
+ __ add_memory_int64(Address(t2), 1);
+ }
+#endif
+
+ // get receiver (need to skip return address on top of stack)
+ assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
+
+ // get receiver klass
+ address npe_addr = __ pc();
+ __ load_klass(t2, j_rarg0);
+
+#ifndef PRODUCT
+ if (DebugVtables) {
+ Label L;
+ start_pc = __ pc();
+
+ // check offset vs vtable length
+ __ lwu(t0, Address(t2, Klass::vtable_length_offset()));
+ __ mvw(t1, vtable_index * vtableEntry::size());
+ __ bgt(t0, t1, L);
+ __ enter();
+ __ mv(x12, vtable_index);
+
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, x12);
+ const ptrdiff_t estimate = 256;
+ const ptrdiff_t codesize = __ pc() - start_pc;
+ slop_delta = estimate - codesize; // call_VM varies in length, depending on data
+ slop_bytes += slop_delta;
+ assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize);
+
+ __ leave();
+ __ bind(L);
+ }
+#endif // PRODUCT
+
+ start_pc = __ pc();
+ __ lookup_virtual_method(t2, vtable_index, xmethod);
+ // lookup_virtual_method generates
+ // 4 instructions (maximum value encountered in normal case):li(lui + addiw) + add + ld
+ // 1 instruction (best case):ld * 1
+ slop_delta = 16 - (int)(__ pc() - start_pc);
+ slop_bytes += slop_delta;
+ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
+
+#ifndef PRODUCT
+ if (DebugVtables) {
+ Label L;
+ __ beqz(xmethod, L);
+ __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
+ __ bnez(t0, L);
+ __ stop("Vtable entry is NULL");
+ __ bind(L);
+ }
+#endif // PRODUCT
+
+ // x10: receiver klass
+ // xmethod: Method*
+ // x12: receiver
+ address ame_addr = __ pc();
+ __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
+ __ jr(t0);
+
+ masm->flush();
+ bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0);
+
+ return s;
+}
+
+VtableStub* VtableStubs::create_itable_stub(int itable_index) {
+ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+ const int stub_code_length = code_size_limit(false);
+ VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
+ // Can be NULL if there is no free space in the code cache.
+ if (s == NULL) {
+ return NULL;
+ }
+ // Count unused bytes in instruction sequences of variable size.
+ // We add them to the computed buffer size in order to avoid
+ // overflow in subsequently generated stubs.
+ address start_pc = NULL;
+ int slop_bytes = 0;
+ int slop_delta = 0;
+
+ ResourceMark rm;
+ CodeBuffer cb(s->entry_point(), stub_code_length);
+ MacroAssembler* masm = new MacroAssembler(&cb);
+ assert_cond(masm != NULL);
+
+#if (!defined(PRODUCT) && defined(COMPILER2))
+ if (CountCompiledCalls) {
+ __ la(x18, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
+ __ add_memory_int64(Address(x18), 1);
+ }
+#endif
+
+ // get receiver (need to skip return address on top of stack)
+ assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
+
+ // Entry arguments:
+ // t2: CompiledICHolder
+ // j_rarg0: Receiver
+
+ // This stub is called from compiled code which has no callee-saved registers,
+ // so all registers except arguments are free at this point.
+ const Register recv_klass_reg = x18;
+ const Register holder_klass_reg = x19; // declaring interface klass (DECC)
+ const Register resolved_klass_reg = xmethod; // resolved interface klass (REFC)
+ const Register temp_reg = x28;
+ const Register temp_reg2 = x29;
+ const Register icholder_reg = t1;
+
+ Label L_no_such_interface;
+
+ __ ld(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset()));
+ __ ld(holder_klass_reg, Address(icholder_reg, CompiledICHolder::holder_metadata_offset()));
+
+ start_pc = __ pc();
+
+ // get receiver klass (also an implicit null-check)
+ address npe_addr = __ pc();
+ __ load_klass(recv_klass_reg, j_rarg0);
+
+ // Receiver subtype check against REFC.
+ __ lookup_interface_method(// inputs: rec. class, interface
+ recv_klass_reg, resolved_klass_reg, noreg,
+ // outputs: scan temp. reg1, scan temp. reg2
+ temp_reg2, temp_reg,
+ L_no_such_interface,
+ /*return_method=*/false);
+
+ const ptrdiff_t typecheckSize = __ pc() - start_pc;
+ start_pc = __ pc();
+
+ // Get selected method from declaring class and itable index
+ __ lookup_interface_method(// inputs: rec. class, interface, itable index
+ recv_klass_reg, holder_klass_reg, itable_index,
+ // outputs: method, scan temp. reg
+ xmethod, temp_reg,
+ L_no_such_interface);
+
+ const ptrdiff_t lookupSize = __ pc() - start_pc;
+
+ // Reduce "estimate" such that "padding" does not drop below 8.
+ const ptrdiff_t estimate = 256;
+ const ptrdiff_t codesize = typecheckSize + lookupSize;
+ slop_delta = (int)(estimate - codesize);
+ slop_bytes += slop_delta;
+ assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
+
+#ifdef ASSERT
+ if (DebugVtables) {
+ Label L2;
+ __ beqz(xmethod, L2);
+ __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
+ __ bnez(t0, L2);
+ __ stop("compiler entrypoint is null");
+ __ bind(L2);
+ }
+#endif // ASSERT
+
+ // xmethod: Method*
+ // j_rarg0: receiver
+ address ame_addr = __ pc();
+ __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
+ __ jr(t0);
+
+ __ bind(L_no_such_interface);
+ // Handle IncompatibleClassChangeError in itable stubs.
+ // More detailed error message.
+ // We force resolving of the call site by jumping to the "handle
+ // wrong method" stub, and so let the interpreter runtime do all the
+ // dirty work.
+ assert(SharedRuntime::get_handle_wrong_method_stub() != NULL, "check initialization order");
+ __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
+
+ masm->flush();
+ bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
+
+ return s;
+}
+
+int VtableStub::pd_code_alignment() {
+ // RISCV cache line size is not an architected constant. We just align on word size.
+ const unsigned int icache_line_size = wordSize;
+ return icache_line_size;
+}
diff --git a/src/hotspot/cpu/s390/assembler_s390.hpp b/src/hotspot/cpu/s390/assembler_s390.hpp
index 1d64ceebeb03b4013898396bb0eac9c1ce922f06..7a1c31157b0d55645f511019e365d149d7fc991a 100644
--- a/src/hotspot/cpu/s390/assembler_s390.hpp
+++ b/src/hotspot/cpu/s390/assembler_s390.hpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016, 2021 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2022 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -729,6 +729,7 @@ class Assembler : public AbstractAssembler {
#define SLRK_ZOPC (unsigned int)(0xb9 << 24 | 0x00fb << 16)
#define SLGRK_ZOPC (unsigned int)(0xb9 << 24 | 0x00eb << 16)
// RM, Logical
+#define SL_ZOPC (unsigned int)(0x5f << 24)
#define SLY_ZOPC (unsigned long)(227L << 40 | 95L)
#define SLGF_ZOPC (unsigned long)(227L << 40 | 27L)
#define SLG_ZOPC (unsigned long)(227L << 40 | 11L)
@@ -1519,7 +1520,9 @@ class Assembler : public AbstractAssembler {
//-----------------------------------------------
// Calculate length of instruction.
+ static unsigned int instr_len(unsigned char len_bits);
static unsigned int instr_len(unsigned char *instr);
+ static unsigned int instr_len(unsigned long instr);
// Longest instructions are 6 bytes on z/Architecture.
static unsigned int instr_maxlen() { return 6; }
@@ -1597,6 +1600,8 @@ class Assembler : public AbstractAssembler {
static int inv_simm32(long x) { return (inv_s_field(x, 31, 0)); } // 6-byte instructions only
static int inv_uimm12(long x) { return (inv_u_field(x, 11, 0)); } // 4-byte instructions only
+ private:
+
// Encode u_field from long value.
static long u_field(long x, int hi_bit, int lo_bit) {
long r = x << lo_bit;
@@ -1616,10 +1621,21 @@ class Assembler : public AbstractAssembler {
else { guarantee(false, "bad address format"); return 0; }
}
- static int64_t rsmask_48( int64_t d2, Register b2) { return uimm12(d2, 20, 48) | regz(b2, 16, 48); }
- static int64_t rxmask_48( int64_t d2, Register x2, Register b2) { return uimm12(d2, 20, 48) | reg(x2, 12, 48) | regz(b2, 16, 48); }
- static int64_t rsymask_48(int64_t d2, Register b2) { return simm20(d2) | regz(b2, 16, 48); }
- static int64_t rxymask_48(int64_t d2, Register x2, Register b2) { return simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48); }
+ static int64_t rsmask_32( int64_t d2, Register b2) { return uimm12(d2, 20, 32) | regz(b2, 16, 32); }
+ static int64_t rsmask_48( int64_t d2, Register b2) { return uimm12(d2, 20, 48) | regz(b2, 16, 48); }
+ static int64_t rsmask_SS( int64_t d2, Register b2) { return uimm12(d2, 36, 48) | regz(b2, 32, 48); } // storage-storage instructions
+ static int64_t rsymask_48(int64_t d2, Register b2) { return simm20(d2) | regz(b2, 16, 48); }
+ static int64_t rxmask_32( int64_t d2, Register x2, Register b2) { return uimm12(d2, 20, 32) | regt(x2, 12, 32) | regz(b2, 16, 32); }
+ static int64_t rxmask_48( int64_t d2, Register x2, Register b2) { return uimm12(d2, 20, 48) | regt(x2, 12, 48) | regz(b2, 16, 48); }
+ static int64_t rxymask_48(int64_t d2, Register x2, Register b2) { return simm20(d2) | regt(x2, 12, 48) | regz(b2, 16, 48); }
+
+ // For instructions which use address calculation to derive an input value to the instruction.
+ // Shift instructions are an example of such use.
+ static int64_t rsmaskt_32( int64_t d2, Register b2) { return uimm12(d2, 20, 32) | regt(b2, 16, 32); }
+ static int64_t rsmaskt_48( int64_t d2, Register b2) { return uimm12(d2, 20, 48) | regt(b2, 16, 48); }
+ static int64_t rsymaskt_48(int64_t d2, Register b2) { return simm20(d2) | regt(b2, 16, 48); }
+ static int64_t rxmaskt_32( int64_t d2, Register x2, Register b2){ return uimm12(d2, 20, 32) | regt(x2, 12, 32) | regt(b2, 16, 32); }
+ static int64_t rxymaskt_48(int64_t d2, Register x2, Register b2){ return simm20(d2) | regt(x2, 12, 48) | regt(b2, 16, 48); }
// Address calculated from d12(vx,b) - vx is vector index register.
static int64_t rvmask_48( int64_t d2, VectorRegister x2, Register b2) { return uimm12(d2, 20, 48) | vreg(x2, 12) | regz(b2, 16, 48); }
@@ -1717,8 +1733,8 @@ class Assembler : public AbstractAssembler {
(((ui20 >> 12) & 0xffL) << (48-40))); // DH
}
- static long reg(Register r, int s, int len) { return u_field(r->encoding(), (len-s)-1, (len-s)-4); }
- static long reg(int r, int s, int len) { return u_field(r, (len-s)-1, (len-s)-4); }
+ static long reg(int r, int s, int len) { return u_field(r, (len-s)-1, (len-s)-4); }
+ static long reg( Register r, int s, int len) { return reg(r->encoding(), s, len); }
static long regt(Register r, int s, int len) { return reg(r, s, len); }
static long regz(Register r, int s, int len) { assert(r != Z_R0, "cannot use register R0 in memory access"); return reg(r, s, len); }
@@ -1792,9 +1808,11 @@ class Assembler : public AbstractAssembler {
static unsigned int align(unsigned int x, unsigned int a) { return ((x + (a - 1)) & ~(a - 1)); }
static bool is_aligned(unsigned int x, unsigned int a) { return (0 == x % a); }
+ inline unsigned int emit_instruction(unsigned long x, unsigned int len);
inline void emit_16(int x);
inline void emit_32(int x);
inline void emit_48(long x);
+ inline void emit_data(int x);
// Compare and control flow instructions
// =====================================
@@ -2159,13 +2177,17 @@ class Assembler : public AbstractAssembler {
// sub memory
inline void z_s( Register r1, int64_t d2, Register x2, Register b2); // sub r1 = r1 - *(d2_imm12+x2+b2) ; int32
- inline void z_sy( Register r1, int64_t d2, Register x2, Register b2); // sub r1 = r1 + *(d2_imm20+s2+b2) ; int32
+ inline void z_sy( Register r1, int64_t d2, Register x2, Register b2); // sub r1 = r1 - *(d2_imm20+s2+b2) ; int32
+ inline void z_sl( Register r1, int64_t d2, Register x2, Register b2); // sub r1 = r1 - *(d2_uimm12+x2+b2); int32
+ inline void z_sly( Register r1, int64_t d2, Register x2, Register b2); // sub r1 = r1 - *(d2_imm20+x2+b2) ; int32
inline void z_sg( Register r1, int64_t d2, Register x2, Register b2); // sub r1 = r1 - *(d2_imm12+x2+b2) ; int64
inline void z_sgf( Register r1, int64_t d2, Register x2, Register b2); // sub r1 = r1 - *(d2_imm12+x2+b2) ; int64 - int32
inline void z_slg( Register r1, int64_t d2, Register x2, Register b2); // sub logical r1 = r1 - *(d2_imm20+x2+b2) ; uint64
inline void z_slgf(Register r1, int64_t d2, Register x2, Register b2); // sub logical r1 = r1 - *(d2_imm20+x2+b2) ; uint64 - uint32
inline void z_s( Register r1, const Address& a); // sub r1 = r1 - *(a) ; int32
inline void z_sy( Register r1, const Address& a); // sub r1 = r1 - *(a) ; int32
+ inline void z_sl( Register r1, const Address& a); // sub r1 = r1 - *(a) ; int32
+ inline void z_sly( Register r1, const Address& a); // sub r1 = r1 - *(a) ; int32
inline void z_sg( Register r1, const Address& a); // sub r1 = r1 - *(a) ; int64
inline void z_sgf( Register r1, const Address& a); // sub r1 = r1 - *(a) ; int64 - int32
inline void z_slg( Register r1, const Address& a); // sub r1 = r1 - *(a) ; uint64
diff --git a/src/hotspot/cpu/s390/assembler_s390.inline.hpp b/src/hotspot/cpu/s390/assembler_s390.inline.hpp
index 31b674a55337246fda0316603bcb575f8a0af58e..d45869e4707b8eac79081cd60e125a147618492b 100644
--- a/src/hotspot/cpu/s390/assembler_s390.inline.hpp
+++ b/src/hotspot/cpu/s390/assembler_s390.inline.hpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016, 2021 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2022 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -30,30 +30,59 @@
#include "asm/codeBuffer.hpp"
#include "code/codeCache.hpp"
-// Convention: Use Z_R0 and Z_R1 instead of Z_scratch_* in all
-// assembler_s390.* files.
-// Local implementation of byte emitters to help inlining.
+inline unsigned int Assembler::emit_instruction(unsigned long x, unsigned int len) {
+ CodeSection* cs = code_section();
+ address pos = pc();
+ assert(((intptr_t)pos & 0x01L) == 0, "instruction address alignment error");
+ if (len == 0) {
+ len = instr_len(x);
+ }
+
+ switch (len) {
+ case 2:
+ *(unsigned short*)(pos) = (unsigned short)x;
+ break;
+ case 4:
+ // May be unaligned store. Only slightly less efficient.
+ *(unsigned int*)(pos) = (unsigned int)x;
+ break;
+ case 6:
+ // Have to split anyway, so we can arrange for aligned stores.
+ if (((intptr_t)pos & 0x03) == 0) {
+ *(unsigned int*)pos = (unsigned int)(x>>16);
+ *(unsigned short*)(pos+4) = (unsigned short)x;
+ } else {
+ *(unsigned short*)(pos) = (unsigned short)(x>>32);
+ *(unsigned int*)(pos+2) = (unsigned int)x;
+ }
+ break;
+ default:
+ ShouldNotReachHere();
+ break;
+ }
+
+ assert(instr_len(pos) == len, "%d-byte emitter can't handle %d-byte instructions.", len, instr_len(pos));
+ cs->set_end(pos + len);
+ return len;
+}
inline void Assembler::emit_16(int x) {
- CodeSection* cs = code_section();
- address code_pos = pc();
- *(unsigned short*)code_pos = (unsigned short)x;
- cs->set_end( code_pos + sizeof(unsigned short));
+ emit_instruction((unsigned int)x, 2);
}
inline void Assembler::emit_32(int x) {
- CodeSection* cs = code_section();
- address code_pos = pc();
- *(jint*)code_pos = (jint)x;
- cs->set_end( code_pos + sizeof( jint));
+ emit_instruction((unsigned int)x, 4);
}
inline void Assembler::emit_48(long x) {
+ emit_instruction((unsigned long)x, 6);
+}
+
+inline void Assembler::emit_data(int x) {
CodeSection* cs = code_section();
address code_pos = pc();
- *(unsigned short*)code_pos = (unsigned short)(x>>32);
- *(jint*)(code_pos+sizeof(unsigned short)) = (jint)x;
- cs->set_end( code_pos + sizeof( jint) + sizeof( unsigned short));
+ *(int*)code_pos = x;
+ cs->set_end(code_pos + sizeof(int));
}
// Support lightweight sync (from z196). Experimental as of now. For explanation see *.hpp file.
@@ -104,26 +133,27 @@ inline void Assembler::z_kimd( Register r1, Register r2) { emit_32( KIMD_ZOPC
inline void Assembler::z_klmd( Register r1, Register r2) { emit_32( KLMD_ZOPC | regt(r1, 24, 32) | regt(r2, 28, 32)); }
inline void Assembler::z_kmac( Register r1, Register r2) { emit_32( KMAC_ZOPC | regt(r1, 24, 32) | regt(r2, 28, 32)); }
-inline void Assembler::z_exrl(Register r1, int64_t i2) { emit_48( EXRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); } // z10
-inline void Assembler::z_exrl(Register r1, address a2) { emit_48( EXRL_ZOPC | regt(r1, 8, 48) | simm32(RelAddr::pcrel_off32(a2, pc()), 16, 48)); } // z10
+inline void Assembler::z_ex( Register r1, int64_t d2, Register x2, Register b2) { emit_32( EX_ZOPC | regz(r1, 8, 32) | rxmask_32(d2, x2, b2)); }
+inline void Assembler::z_exrl(Register r1, int64_t i2) { emit_48( EXRL_ZOPC | regz(r1, 8, 48) | simm32(i2, 16, 48)); } // z10
+inline void Assembler::z_exrl(Register r1, address a2) { emit_48( EXRL_ZOPC | regz(r1, 8, 48) | simm32(RelAddr::pcrel_off32(a2, pc()), 16, 48)); } // z10
-inline void Assembler::z_ectg(int64_t d1, Register b1, int64_t d2, Register b2, Register r3) { emit_48( ECTG_ZOPC | reg(r3, 8, 48) | uimm12(d1, 20, 48) | reg(b1, 16, 48) | uimm12(d2, 36, 48) | reg(b2, 32, 48)); }
-inline void Assembler::z_ecag(Register r1, Register r3, int64_t d2, Register b2) { emit_48( ECAG_ZOPC | reg(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | reg(b2, 16, 48)); }
+inline void Assembler::z_ectg( int64_t d1, Register b1, int64_t d2, Register b2, Register r3) { emit_48( ECTG_ZOPC | reg(r3, 8, 48) | rsmask_48(d1, b1) | rsmask_SS( d2, b2)); }
+inline void Assembler::z_ecag(Register r1, Register r3, int64_t d2, Register b2) { emit_48( ECAG_ZOPC | reg(r1, 8, 48) | reg(r3, 12, 48) | rsymaskt_48(d2, b2)); }
//------------------------------
// Interlocked-Update
//------------------------------
-inline void Assembler::z_laa( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAA_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
-inline void Assembler::z_laag( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAAG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
-inline void Assembler::z_laal( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAAL_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
-inline void Assembler::z_laalg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAALG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
-inline void Assembler::z_lan( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAN_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
-inline void Assembler::z_lang( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LANG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
-inline void Assembler::z_lax( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAX_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
-inline void Assembler::z_laxg( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAXG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
-inline void Assembler::z_lao( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAO_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
-inline void Assembler::z_laog( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAOG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+inline void Assembler::z_laa( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAA_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | rsymask_48(d2, b2)); }
+inline void Assembler::z_laag( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAAG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | rsymask_48(d2, b2)); }
+inline void Assembler::z_laal( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAAL_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | rsymask_48(d2, b2)); }
+inline void Assembler::z_laalg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAALG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | rsymask_48(d2, b2)); }
+inline void Assembler::z_lan( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAN_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | rsymask_48(d2, b2)); }
+inline void Assembler::z_lang( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LANG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | rsymask_48(d2, b2)); }
+inline void Assembler::z_lax( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAX_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | rsymask_48(d2, b2)); }
+inline void Assembler::z_laxg( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAXG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | rsymask_48(d2, b2)); }
+inline void Assembler::z_lao( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAO_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | rsymask_48(d2, b2)); }
+inline void Assembler::z_laog( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAOG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | rsymask_48(d2, b2)); }
inline void Assembler::z_laa( Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_laa( r1, r3, a.disp12(), a.base()); }
inline void Assembler::z_laag( Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_laag( r1, r3, a.disp12(), a.base()); }
@@ -139,33 +169,33 @@ inline void Assembler::z_laog( Register r1, Register r3, const Address& a) { ass
//--------------------------------
// Execution Prediction
//--------------------------------
-inline void Assembler::z_pfd( int64_t m1, int64_t d2, Register x2, Register b2) { emit_48( PFD_ZOPC | uimm4(m1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_pfd( int64_t m1, int64_t d2, Register x2, Register b2) { emit_48( PFD_ZOPC | uimm4(m1, 8, 48) | rxymask_48(d2, x2, b2)); }
inline void Assembler::z_pfd( int64_t m1, Address a) { z_pfd(m1, a.disp(), a.indexOrR0(), a.base()); }
inline void Assembler::z_pfdrl(int64_t m1, int64_t i2) { emit_48( PFDRL_ZOPC | uimm4(m1, 8, 48) | simm32(i2, 16, 48)); }
-inline void Assembler::z_bpp( int64_t m1, int64_t i2, int64_t d3, Register b3) { emit_48( BPP_ZOPC | uimm4(m1, 8, 48) | uimm12(d3, 20, 48) | reg(b3, 16, 48) | simm16(i2, 32, 48)); }
+inline void Assembler::z_bpp( int64_t m1, int64_t i2, int64_t d3, Register b3) { emit_48( BPP_ZOPC | uimm4(m1, 8, 48) | rsmask_48(d3, b3) | simm16(i2, 32, 48)); }
inline void Assembler::z_bprp( int64_t m1, int64_t i2, int64_t i3) { emit_48( BPRP_ZOPC | uimm4(m1, 8, 48) | simm12(i2, 12, 48) | simm24(i3, 24, 48)); }
//-------------------------------
// Transaction Control
//-------------------------------
-inline void Assembler::z_tbegin( int64_t d1, Register b1, int64_t i2) { emit_48( TBEGIN_ZOPC | uimm12(d1, 20, 48) | reg(b1, 16, 48) | uimm16(i2, 32, 48)); }
-inline void Assembler::z_tbeginc(int64_t d1, Register b1, int64_t i2) { emit_48( TBEGINC_ZOPC | uimm12(d1, 20, 48) | reg(b1, 16, 48) | uimm16(i2, 32, 48)); }
+inline void Assembler::z_tbegin( int64_t d1, Register b1, int64_t i2) { emit_48( TBEGIN_ZOPC | rsmask_48(d1, b1) | uimm16(i2, 32, 48)); }
+inline void Assembler::z_tbeginc(int64_t d1, Register b1, int64_t i2) { emit_48( TBEGINC_ZOPC | rsmask_48(d1, b1) | uimm16(i2, 32, 48)); }
inline void Assembler::z_tend() { emit_32( TEND_ZOPC); }
-inline void Assembler::z_tabort( int64_t d2, Register b2) { emit_32( TABORT_ZOPC | uimm12(d2, 20, 32) | reg(b2, 16, 32)); }
+inline void Assembler::z_tabort( int64_t d2, Register b2) { emit_32( TABORT_ZOPC | rsmask_32(d2, b2)); }
inline void Assembler::z_etnd(Register r1) { emit_32( ETND_ZOPC | regt(r1, 24, 32)); }
inline void Assembler::z_ppa(Register r1, Register r2, int64_t m3) { emit_32( PPA_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
//---------------------------------
// Conditional Execution
//---------------------------------
-inline void Assembler::z_locr( Register r1, Register r2, branch_condition cc) { emit_32( LOCR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | uimm4(cc, 16, 32)); } // z196
-inline void Assembler::z_locgr( Register r1, Register r2, branch_condition cc) { emit_32( LOCGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | uimm4(cc, 16, 32)); } // z196
-inline void Assembler::z_loc( Register r1, int64_t d2, Register b2, branch_condition cc) { emit_48( LOC_ZOPC | regt(r1, 8, 48) | simm20(d2) | regz(b2, 16, 48) | uimm4(cc, 12, 48)); } // z196
-inline void Assembler::z_locg( Register r1, int64_t d2, Register b2, branch_condition cc) { emit_48( LOCG_ZOPC | regt(r1, 8, 48) | simm20(d2) | regz(b2, 16, 48) | uimm4(cc, 12, 48)); } // z196
-inline void Assembler::z_loc( Register r1, const Address &a, branch_condition cc) { z_loc(r1, a.disp(), a.base(), cc); }
+inline void Assembler::z_locr( Register r1, Register r2, branch_condition cc) { emit_32( LOCR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | uimm4(cc, 16, 32)); } // z196
+inline void Assembler::z_locgr( Register r1, Register r2, branch_condition cc) { emit_32( LOCGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | uimm4(cc, 16, 32)); } // z196
+inline void Assembler::z_loc( Register r1, int64_t d2, Register b2, branch_condition cc) { emit_48( LOC_ZOPC | regt(r1, 8, 48) | rsymask_48(d2, b2) | uimm4(cc, 12, 48)); } // z196
+inline void Assembler::z_locg( Register r1, int64_t d2, Register b2, branch_condition cc) { emit_48( LOCG_ZOPC | regt(r1, 8, 48) | rsymask_48(d2, b2) | uimm4(cc, 12, 48)); } // z196
+inline void Assembler::z_loc( Register r1, const Address &a, branch_condition cc) { z_loc(r1, a.disp(), a.base(), cc); }
inline void Assembler::z_locg( Register r1, const Address &a, branch_condition cc) { z_locg(r1, a.disp(), a.base(), cc); }
-inline void Assembler::z_stoc( Register r1, int64_t d2, Register b2, branch_condition cc) { emit_48( STOC_ZOPC | regt(r1, 8, 48) | simm20(d2) | regz(b2, 16, 48) | uimm4(cc, 12, 48)); } // z196
-inline void Assembler::z_stocg( Register r1, int64_t d2, Register b2, branch_condition cc) { emit_48( STOCG_ZOPC | regt(r1, 8, 48) | simm20(d2) | regz(b2, 16, 48) | uimm4(cc, 12, 48)); } // z196
+inline void Assembler::z_stoc( Register r1, int64_t d2, Register b2, branch_condition cc) { emit_48( STOC_ZOPC | regt(r1, 8, 48) | rsymask_48(d2, b2) | uimm4(cc, 12, 48)); } // z196
+inline void Assembler::z_stocg( Register r1, int64_t d2, Register b2, branch_condition cc) { emit_48( STOCG_ZOPC | regt(r1, 8, 48) | rsymask_48(d2, b2) | uimm4(cc, 12, 48)); } // z196
inline void Assembler::z_srst( Register r1, Register r2) { emit_32( SRST_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
inline void Assembler::z_srstu(Register r1, Register r2) { emit_32( SRSTU_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
@@ -173,26 +203,29 @@ inline void Assembler::z_srstu(Register r1, Register r2) { emit_32( SRSTU_ZOPC |
//---------------------------------
// Address calculation
//---------------------------------
-inline void Assembler::z_layz(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LAY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | reg(b2, 16, 48)); }
+inline void Assembler::z_layz(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LAY_ZOPC | regt(r1, 8, 48) | rxymaskt_48(d2, x2, b2)); }
+inline void Assembler::z_lay( Register r1, int64_t d2, Register x2, Register b2) { emit_48( LAY_ZOPC | regt(r1, 8, 48) | rxymask_48( d2, x2, b2)); }
+inline void Assembler::z_laz( Register r1, int64_t d2, Register x2, Register b2) { emit_32( LA_ZOPC | regt(r1, 8, 32) | rxmaskt_32( d2, x2, b2)); }
+inline void Assembler::z_la( Register r1, int64_t d2, Register x2, Register b2) { emit_32( LA_ZOPC | regt(r1, 8, 32) | rxmask_32( d2, x2, b2)); }
inline void Assembler::z_lay( Register r1, const Address &a) { z_layz(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_lay( Register r1, int64_t d2, Register x2, Register b2) { emit_48( LAY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_laz( Register r1, int64_t d2, Register x2, Register b2) { emit_32( LA_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | reg(b2, 16, 32)); }
-inline void Assembler::z_la( Register r1, const Address &a) { z_laz(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_la( Register r1, int64_t d2, Register x2, Register b2) { emit_32( LA_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32));}
+inline void Assembler::z_la( Register r1, const Address &a) { z_laz( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
inline void Assembler::z_larl(Register r1, int64_t i2) { emit_48( LARL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
inline void Assembler::z_larl(Register r1, address a) { emit_48( LARL_ZOPC | regt(r1, 8, 48) | simm32(RelAddr::pcrel_off32(a, pc()), 16, 48)); }
-inline void Assembler::z_lr(Register r1, Register r2) { emit_16( LR_ZOPC | regt(r1,8,16) | reg(r2,12,16)); }
-inline void Assembler::z_lgr(Register r1, Register r2) { emit_32( LGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
-inline void Assembler::z_lh(Register r1, int64_t d2, Register x2, Register b2) { emit_32( LH_ZOPC | 0 << 16 | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
-inline void Assembler::z_lh(Register r1, const Address &a) { z_lh(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_l(Register r1, int64_t d2, Register x2, Register b2) { emit_32( L_ZOPC | 0 << 16 | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
-inline void Assembler::z_l(Register r1, const Address &a) { z_l(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_lg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_lg(Register r1, const Address &a) { z_lg(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-
-inline void Assembler::z_lbr( Register r1, Register r2) { emit_32( LBR_ZOPC | regt(r1, 24, 32) | reg( r2, 28, 32)); }
-inline void Assembler::z_lhr( Register r1, Register r2) { emit_32( LHR_ZOPC | regt(r1, 24, 32) | reg( r2, 28, 32)); }
+//---------------------------------
+// Load/Store
+//---------------------------------
+inline void Assembler::z_lr( Register r1, Register r2) { emit_16( LR_ZOPC | regt(r1, 8, 16) | reg(r2, 12, 16)); }
+inline void Assembler::z_lgr(Register r1, Register r2) { emit_32( LGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_lh( Register r1, int64_t d2, Register x2, Register b2) { emit_32( LH_ZOPC | regt(r1, 8, 32) | rxmask_32( d2, x2, b2)); }
+inline void Assembler::z_l( Register r1, int64_t d2, Register x2, Register b2) { emit_32( L_ZOPC | regt(r1, 8, 32) | rxmask_32( d2, x2, b2)); }
+inline void Assembler::z_lg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( LG_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_lh( Register r1, const Address &a) { z_lh(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_l( Register r1, const Address &a) { z_l( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_lg( Register r1, const Address &a) { z_lg(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+inline void Assembler::z_lbr( Register r1, Register r2) { emit_32( LBR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_lhr( Register r1, Register r2) { emit_32( LHR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
inline void Assembler::z_lgbr( Register r1, Register r2) { emit_32( LGBR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
inline void Assembler::z_lghr( Register r1, Register r2) { emit_32( LGHR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
inline void Assembler::z_lgfr( Register r1, Register r2) { emit_32( LGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
@@ -201,30 +234,30 @@ inline void Assembler::z_llgcr(Register r1, Register r2) { emit_32( LLGCR_ZOPC |
inline void Assembler::z_llghr(Register r1, Register r2) { emit_32( LLGHR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
inline void Assembler::z_llgfr(Register r1, Register r2) { emit_32( LLGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_sth(Register r1, int64_t d2, Register x2, Register b2) { emit_32( STH_ZOPC | reg(r1, 8, 32) | rxmask_32( d2, x2, b2)); }
+inline void Assembler::z_st( Register r1, int64_t d2, Register x2, Register b2) { emit_32( ST_ZOPC | reg(r1, 8, 32) | rxmask_32( d2, x2, b2)); }
+inline void Assembler::z_stg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( STG_ZOPC | reg(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
inline void Assembler::z_sth(Register r1, const Address &a) { z_sth(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_sth(Register r1, int64_t d2, Register x2, Register b2) { emit_32( STH_ZOPC | reg(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
-inline void Assembler::z_st( Register r1, const Address& d) { z_st(r1, d.disp(), d.indexOrR0(), d.base()); }
-inline void Assembler::z_st( Register r1, int64_t d2, Register x2, Register b2) { emit_32( ST_ZOPC | reg(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
-inline void Assembler::z_stg(Register r1, const Address& d) { z_stg(r1, d.disp(), d.indexOrR0(), d.base()); }
-inline void Assembler::z_stg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( STG_ZOPC | reg(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_st( Register r1, const Address &a) { z_st( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_stg(Register r1, const Address &a) { z_stg(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_stcm (Register r1, int64_t m3, int64_t d2, Register b2) { emit_32( STCM_ZOPC | regt(r1, 8, 32) | uimm4(m3, 12, 32) | uimm12(d2, 20, 32) | regz(b2, 16, 32)); }
-inline void Assembler::z_stcmy(Register r1, int64_t m3, int64_t d2, Register b2) { emit_48( STCMY_ZOPC | regt(r1, 8, 48) | uimm4(m3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
-inline void Assembler::z_stcmh(Register r1, int64_t m3, int64_t d2, Register b2) { emit_48( STCMH_ZOPC | regt(r1, 8, 48) | uimm4(m3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+inline void Assembler::z_stcm (Register r1, int64_t m3, int64_t d2, Register b2) { emit_32( STCM_ZOPC | regt(r1, 8, 32) | uimm4(m3, 12, 32) | rsmask_32( d2, b2)); }
+inline void Assembler::z_stcmy(Register r1, int64_t m3, int64_t d2, Register b2) { emit_48( STCMY_ZOPC | regt(r1, 8, 48) | uimm4(m3, 12, 48) | rsymask_48(d2, b2)); }
+inline void Assembler::z_stcmh(Register r1, int64_t m3, int64_t d2, Register b2) { emit_48( STCMH_ZOPC | regt(r1, 8, 48) | uimm4(m3, 12, 48) | rsymask_48(d2, b2)); }
// memory-immediate instructions (8-bit immediate)
-inline void Assembler::z_cli( int64_t d1, Register b1, int64_t i2) { emit_32( CLI_ZOPC | uimm12(d1, 20, 32) | regz(b1, 16, 32) | uimm8(i2, 8, 32)); }
-inline void Assembler::z_mvi( int64_t d1, Register b1, int64_t i2) { emit_32( MVI_ZOPC | uimm12(d1, 20, 32) | regz(b1, 16, 32) | imm8(i2, 8, 32)); }
-inline void Assembler::z_tm( int64_t d1, Register b1, int64_t i2) { emit_32( TM_ZOPC | uimm12(d1, 20, 32) | regz(b1, 16, 32) | imm8(i2, 8, 32)); }
-inline void Assembler::z_ni( int64_t d1, Register b1, int64_t i2) { emit_32( NI_ZOPC | uimm12(d1, 20, 32) | regz(b1, 16, 32) | imm8(i2, 8, 32)); }
-inline void Assembler::z_oi( int64_t d1, Register b1, int64_t i2) { emit_32( OI_ZOPC | uimm12(d1, 20, 32) | regz(b1, 16, 32) | imm8(i2, 8, 32)); }
-inline void Assembler::z_xi( int64_t d1, Register b1, int64_t i2) { emit_32( XI_ZOPC | uimm12(d1, 20, 32) | regz(b1, 16, 32) | imm8(i2, 8, 32)); }
-inline void Assembler::z_cliy(int64_t d1, Register b1, int64_t i2) { emit_48( CLIY_ZOPC | simm20(d1) | regz(b1, 16, 48) | uimm8(i2, 8, 48)); }
-inline void Assembler::z_mviy(int64_t d1, Register b1, int64_t i2) { emit_48( MVIY_ZOPC | simm20(d1) | regz(b1, 16, 48) | imm8(i2, 8, 48)); }
-inline void Assembler::z_tmy( int64_t d1, Register b1, int64_t i2) { emit_48( TMY_ZOPC | simm20(d1) | regz(b1, 16, 48) | imm8(i2, 8, 48)); }
-inline void Assembler::z_niy( int64_t d1, Register b1, int64_t i2) { emit_48( NIY_ZOPC | simm20(d1) | regz(b1, 16, 48) | imm8(i2, 8, 48)); }
-inline void Assembler::z_oiy( int64_t d1, Register b1, int64_t i2) { emit_48( OIY_ZOPC | simm20(d1) | regz(b1, 16, 48) | imm8(i2, 8, 48)); }
-inline void Assembler::z_xiy( int64_t d1, Register b1, int64_t i2) { emit_48( XIY_ZOPC | simm20(d1) | regz(b1, 16, 48) | imm8(i2, 8, 48)); }
+inline void Assembler::z_cli( int64_t d1, Register b1, int64_t i2) { emit_32( CLI_ZOPC | rsmask_32( d1, b1) | uimm8(i2, 8, 32)); }
+inline void Assembler::z_mvi( int64_t d1, Register b1, int64_t i2) { emit_32( MVI_ZOPC | rsmask_32( d1, b1) | imm8(i2, 8, 32)); }
+inline void Assembler::z_tm( int64_t d1, Register b1, int64_t i2) { emit_32( TM_ZOPC | rsmask_32( d1, b1) | imm8(i2, 8, 32)); }
+inline void Assembler::z_ni( int64_t d1, Register b1, int64_t i2) { emit_32( NI_ZOPC | rsmask_32( d1, b1) | imm8(i2, 8, 32)); }
+inline void Assembler::z_oi( int64_t d1, Register b1, int64_t i2) { emit_32( OI_ZOPC | rsmask_32( d1, b1) | imm8(i2, 8, 32)); }
+inline void Assembler::z_xi( int64_t d1, Register b1, int64_t i2) { emit_32( XI_ZOPC | rsmask_32( d1, b1) | imm8(i2, 8, 32)); }
+inline void Assembler::z_cliy(int64_t d1, Register b1, int64_t i2) { emit_48( CLIY_ZOPC | rsymask_48(d1, b1) | uimm8(i2, 8, 48)); }
+inline void Assembler::z_mviy(int64_t d1, Register b1, int64_t i2) { emit_48( MVIY_ZOPC | rsymask_48(d1, b1) | imm8(i2, 8, 48)); }
+inline void Assembler::z_tmy( int64_t d1, Register b1, int64_t i2) { emit_48( TMY_ZOPC | rsymask_48(d1, b1) | imm8(i2, 8, 48)); }
+inline void Assembler::z_niy( int64_t d1, Register b1, int64_t i2) { emit_48( NIY_ZOPC | rsymask_48(d1, b1) | imm8(i2, 8, 48)); }
+inline void Assembler::z_oiy( int64_t d1, Register b1, int64_t i2) { emit_48( OIY_ZOPC | rsymask_48(d1, b1) | imm8(i2, 8, 48)); }
+inline void Assembler::z_xiy( int64_t d1, Register b1, int64_t i2) { emit_48( XIY_ZOPC | rsymask_48(d1, b1) | imm8(i2, 8, 48)); }
inline void Assembler::z_cli( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in CLI"); z_cli( a.disp12(), a.base(), imm); }
inline void Assembler::z_mvi( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in CLI"); z_mvi( a.disp12(), a.base(), imm); }
@@ -244,61 +277,58 @@ inline void Assembler::z_mvc(const Address& d, const Address& s, int64_t l) {
assert(!d.has_index() && !s.has_index(), "Address operand can not be encoded.");
z_mvc(d.disp(), l-1, d.base(), s.disp(), s.base());
}
-inline void Assembler::z_mvc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( MVC_ZOPC | uimm12(d1, 20, 48) | uimm8(l, 8, 48) | regz(b1, 16, 48) | uimm12(d2, 36, 48) | regz(b2, 32, 48)); }
-inline void Assembler::z_mvcle(Register r1, Register r3, int64_t d2, Register b2) { emit_32( MVCLE_ZOPC | reg(r1, 8, 32) | reg(r3, 12, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); }
+inline void Assembler::z_mvc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( MVC_ZOPC | uimm8(l, 8, 48) | rsmask_48(d1, b1) | rsmask_SS(d2, b2)); }
+inline void Assembler::z_mvcle(Register r1, Register r3, int64_t d2, Register b2) { emit_32( MVCLE_ZOPC | reg(r1, 8, 32) | reg(r3, 12, 32) | rsmaskt_32(d2, b2)); }
-inline void Assembler::z_mvhhi( int64_t d1, Register b1, int64_t i2) { emit_48( MVHHI_ZOPC | uimm12( d1, 20, 48) | regz(b1, 16, 48) | simm16(i2, 32, 48)); }
-inline void Assembler::z_mvhi ( int64_t d1, Register b1, int64_t i2) { emit_48( MVHI_ZOPC | uimm12( d1, 20, 48) | regz(b1, 16, 48) | simm16(i2, 32, 48)); }
-inline void Assembler::z_mvghi( int64_t d1, Register b1, int64_t i2) { emit_48( MVGHI_ZOPC | uimm12( d1, 20, 48) | regz(b1, 16, 48) | simm16(i2, 32, 48)); }
+inline void Assembler::z_mvhhi( int64_t d1, Register b1, int64_t i2) { emit_48( MVHHI_ZOPC | rsmask_48( d1, b1) | simm16(i2, 32, 48)); }
+inline void Assembler::z_mvhi ( int64_t d1, Register b1, int64_t i2) { emit_48( MVHI_ZOPC | rsmask_48( d1, b1) | simm16(i2, 32, 48)); }
+inline void Assembler::z_mvghi( int64_t d1, Register b1, int64_t i2) { emit_48( MVGHI_ZOPC | rsmask_48( d1, b1) | simm16(i2, 32, 48)); }
inline void Assembler::z_mvhhi( const Address &d, int64_t i2) { assert(!d.has_index(), " no index reg allowed in MVHHI"); z_mvhhi( d.disp(), d.baseOrR0(), i2); }
inline void Assembler::z_mvhi ( const Address &d, int64_t i2) { assert(!d.has_index(), " no index reg allowed in MVHI"); z_mvhi( d.disp(), d.baseOrR0(), i2); }
inline void Assembler::z_mvghi( const Address &d, int64_t i2) { assert(!d.has_index(), " no index reg allowed in MVGHI"); z_mvghi( d.disp(), d.baseOrR0(), i2); }
-inline void Assembler::z_ex(Register r1, int64_t d2, Register x2, Register b2) { emit_32( EX_ZOPC | regz(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
-
-inline void Assembler::z_ic (Register r1, int64_t d2, Register x2, Register b2) { emit_32( IC_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
-inline void Assembler::z_icy (Register r1, int64_t d2, Register x2, Register b2) { emit_48( ICY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_icm (Register r1, int64_t m3, int64_t d2, Register b2) { emit_32( ICM_ZOPC | regt(r1, 8, 32) | uimm4(m3, 12, 32) | uimm12(d2, 20, 32) | regz(b2, 16, 32)); }
-inline void Assembler::z_icmy(Register r1, int64_t m3, int64_t d2, Register b2) { emit_48( ICMY_ZOPC | regt(r1, 8, 48) | uimm4(m3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
-inline void Assembler::z_icmh(Register r1, int64_t m3, int64_t d2, Register b2) { emit_48( ICMH_ZOPC | regt(r1, 8, 48) | uimm4(m3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); }
+inline void Assembler::z_ic (Register r1, int64_t d2, Register x2, Register b2) { emit_32( IC_ZOPC | regt(r1, 8, 32) | rxmask_32( d2, x2, b2)); }
+inline void Assembler::z_icy (Register r1, int64_t d2, Register x2, Register b2) { emit_48( ICY_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_icm (Register r1, int64_t m3, int64_t d2, Register b2) { emit_32( ICM_ZOPC | regt(r1, 8, 32) | uimm4(m3, 12, 32) | rsmask_32( d2, b2)); }
+inline void Assembler::z_icmy(Register r1, int64_t m3, int64_t d2, Register b2) { emit_48( ICMY_ZOPC | regt(r1, 8, 48) | uimm4(m3, 12, 48) | rsymask_48(d2, b2)); }
+inline void Assembler::z_icmh(Register r1, int64_t m3, int64_t d2, Register b2) { emit_48( ICMH_ZOPC | regt(r1, 8, 48) | uimm4(m3, 12, 48) | rsymask_48(d2, b2)); }
inline void Assembler::z_iihh(Register r1, int64_t i2) { emit_32( IIHH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
inline void Assembler::z_iihl(Register r1, int64_t i2) { emit_32( IIHL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
inline void Assembler::z_iilh(Register r1, int64_t i2) { emit_32( IILH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
inline void Assembler::z_iill(Register r1, int64_t i2) { emit_32( IILL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
inline void Assembler::z_iihf(Register r1, int64_t i2) { emit_48( IIHF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
inline void Assembler::z_iilf(Register r1, int64_t i2) { emit_48( IILF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
-inline void Assembler::z_lgf(Register r1, const Address& a) { z_lgf(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_lgf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LGF_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_lhy(Register r1, const Address &a) { z_lhy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_lhy(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LHY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_lgh(Register r1, const Address &a) { z_lgh(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_lgh(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LGH_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_lt(Register r1, const Address &a) { z_lt(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_lt (Register r1, int64_t d2, Register x2, Register b2) { emit_48( LT_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_ltg(Register r1, const Address &a) { z_ltg(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_ltg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LTG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_lgf( Register r1, int64_t d2, Register x2, Register b2) { emit_48( LGF_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_lhy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( LHY_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_lgh( Register r1, int64_t d2, Register x2, Register b2) { emit_48( LGH_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_lt ( Register r1, int64_t d2, Register x2, Register b2) { emit_48( LT_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_ltg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( LTG_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_ltgf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LTGF_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_lb ( Register r1, int64_t d2, Register x2, Register b2) { emit_48( LB_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_lgb( Register r1, int64_t d2, Register x2, Register b2) { emit_48( LGB_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_ly( Register r1, int64_t d2, Register x2, Register b2) { emit_48( LY_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_llc( Register r1, int64_t d2, Register x2, Register b2) { emit_48( LLC_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_llh( Register r1, int64_t d2, Register x2, Register b2) { emit_48( LLH_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_llgf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LLGF_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_llgh(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LLGH_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_llgc(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LLGC_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_lgf( Register r1, const Address &a) { z_lgf( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_lhy( Register r1, const Address &a) { z_lhy( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_lgh( Register r1, const Address &a) { z_lgh( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_lt( Register r1, const Address &a) { z_lt( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ltg( Register r1, const Address &a) { z_ltg( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
inline void Assembler::z_ltgf(Register r1, const Address &a) { z_ltgf(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_ltgf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LTGF_ZOPC| regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_lb(Register r1, const Address &a) { z_lb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_lb (Register r1, int64_t d2, Register x2, Register b2) { emit_48( LB_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_lgb(Register r1, const Address &a) { z_lgb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_lgb(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LGB_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_ly(Register r1, const Address &a) { z_ly(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_ly(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_llc(Register r1, const Address& a) { z_llc(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_llc(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LLC_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_llh(Register r1, const Address &a) { z_llh(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_llh(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LLH_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_lb( Register r1, const Address &a) { z_lb( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_lgb( Register r1, const Address &a) { z_lgb( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ly( Register r1, const Address &a) { z_ly( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_llc( Register r1, const Address &a) { z_llc( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_llh( Register r1, const Address &a) { z_llh( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
inline void Assembler::z_llgf(Register r1, const Address &a) { z_llgf(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_llgf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LLGF_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
inline void Assembler::z_llgh(Register r1, const Address &a) { z_llgh(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_llgh(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LLGH_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
inline void Assembler::z_llgc(Register r1, const Address &a) { z_llgc(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_llgc(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LLGC_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_llgc(Register r1, int64_t d2, Register b2) { z_llgc( r1, d2, Z_R0, b2); }
-inline void Assembler::z_lhi(Register r1, int64_t i2) { emit_32( LHI_ZOPC | regt(r1, 8, 32) | simm16(i2, 16, 32)); }
-inline void Assembler::z_lghi(Register r1, int64_t i2) { emit_32( LGHI_ZOPC | regt(r1, 8, 32) | simm16(i2, 16, 32)); }
-inline void Assembler::z_lgfi(Register r1, int64_t i2) { emit_48( LGFI_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
+inline void Assembler::z_lhi( Register r1, int64_t i2) { emit_32( LHI_ZOPC | regt(r1, 8, 32) | simm16(i2, 16, 32)); }
+inline void Assembler::z_lghi( Register r1, int64_t i2) { emit_32( LGHI_ZOPC | regt(r1, 8, 32) | simm16(i2, 16, 32)); }
+inline void Assembler::z_lgfi( Register r1, int64_t i2) { emit_48( LGFI_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); }
inline void Assembler::z_llihf(Register r1, int64_t i2) { emit_48( LLIHF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
inline void Assembler::z_llilf(Register r1, int64_t i2) { emit_48( LLILF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
inline void Assembler::z_llihh(Register r1, int64_t i2) { emit_32( LLIHH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
@@ -323,36 +353,37 @@ inline void Assembler::z_lrvgr(Register r1, Register r2) { emit_32( LRVGR_ZOPC |
inline void Assembler::z_ltr( Register r1, Register r2) { emit_16( LTR_ZOPC | regt(r1, 8, 16) | reg(r2, 12, 16)); }
inline void Assembler::z_ltgr( Register r1, Register r2) { emit_32( LTGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
inline void Assembler::z_ltgfr(Register r1, Register r2) { emit_32( LTGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
-inline void Assembler::z_stc( Register r1, const Address &a) { z_stc(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_stc( Register r1, int64_t d2, Register x2, Register b2) { emit_32( STC_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
+inline void Assembler::z_stc( Register r1, int64_t d2, Register x2, Register b2) { emit_32( STC_ZOPC | regt(r1, 8, 32) | rxmask_32( d2, x2, b2)); }
+inline void Assembler::z_stcy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( STCY_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_sthy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( STHY_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_sty( Register r1, int64_t d2, Register x2, Register b2) { emit_48( STY_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_stc( Register r1, const Address &a) { z_stc( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
inline void Assembler::z_stcy( Register r1, const Address &a) { z_stcy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_stcy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( STCY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
inline void Assembler::z_sthy( Register r1, const Address &a) { z_sthy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_sthy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( STHY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_sty( Register r1, const Address &a) { z_sty(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_sty( Register r1, int64_t d2, Register x2, Register b2) { emit_48( STY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_stfle(int64_t d2, Register b2) { emit_32(STFLE_ZOPC | uimm12(d2,20,32) | regz(b2,16,32)); }
+inline void Assembler::z_sty( Register r1, const Address &a) { z_sty( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+inline void Assembler::z_stfle(int64_t d2, Register b2) { emit_32(STFLE_ZOPC | rsmask_32(d2, b2)); }
//-----------------------------------
// SHIFT/RORATE OPERATIONS
//-----------------------------------
-inline void Assembler::z_sla( Register r1, int64_t d2, Register b2) { emit_32( SLA_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); }
-inline void Assembler::z_slak(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SLAK_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(b2, 16, 48) | reg(r3, 12, 48)); }
-inline void Assembler::z_slag(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SLAG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(b2, 16, 48) | reg(r3, 12, 48)); }
-inline void Assembler::z_sra( Register r1, int64_t d2, Register b2) { emit_32( SRA_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); }
-inline void Assembler::z_srak(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SRAK_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(b2, 16, 48) | reg(r3, 12, 48)); }
-inline void Assembler::z_srag(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SRAG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(b2, 16, 48) | reg(r3, 12, 48)); }
-inline void Assembler::z_sll( Register r1, int64_t d2, Register b2) { emit_32( SLL_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); }
-inline void Assembler::z_sllk(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SLLK_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(b2, 16, 48) | reg(r3, 12, 48)); }
-inline void Assembler::z_sllg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SLLG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(b2, 16, 48) | reg(r3, 12, 48)); }
-inline void Assembler::z_srl( Register r1, int64_t d2, Register b2) { emit_32( SRL_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); }
-inline void Assembler::z_srlk(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SRLK_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(b2, 16, 48) | reg(r3, 12, 48)); }
-inline void Assembler::z_srlg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SRLG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(b2, 16, 48) | reg(r3, 12, 48)); }
+inline void Assembler::z_sla( Register r1, int64_t d2, Register b2) { emit_32( SLA_ZOPC | regt(r1, 8, 32) | rsmaskt_32( d2, b2)); }
+inline void Assembler::z_slak(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SLAK_ZOPC | regt(r1, 8, 48) | rsymaskt_48(d2, b2) | reg(r3, 12, 48)); }
+inline void Assembler::z_slag(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SLAG_ZOPC | regt(r1, 8, 48) | rsymaskt_48(d2, b2) | reg(r3, 12, 48)); }
+inline void Assembler::z_sra( Register r1, int64_t d2, Register b2) { emit_32( SRA_ZOPC | regt(r1, 8, 32) | rsmaskt_32( d2, b2)); }
+inline void Assembler::z_srak(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SRAK_ZOPC | regt(r1, 8, 48) | rsymaskt_48(d2, b2) | reg(r3, 12, 48)); }
+inline void Assembler::z_srag(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SRAG_ZOPC | regt(r1, 8, 48) | rsymaskt_48(d2, b2) | reg(r3, 12, 48)); }
+inline void Assembler::z_sll( Register r1, int64_t d2, Register b2) { emit_32( SLL_ZOPC | regt(r1, 8, 32) | rsmaskt_32( d2, b2)); }
+inline void Assembler::z_sllk(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SLLK_ZOPC | regt(r1, 8, 48) | rsymaskt_48(d2, b2) | reg(r3, 12, 48)); }
+inline void Assembler::z_sllg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SLLG_ZOPC | regt(r1, 8, 48) | rsymaskt_48(d2, b2) | reg(r3, 12, 48)); }
+inline void Assembler::z_srl( Register r1, int64_t d2, Register b2) { emit_32( SRL_ZOPC | regt(r1, 8, 32) | rsmaskt_32( d2, b2)); }
+inline void Assembler::z_srlk(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SRLK_ZOPC | regt(r1, 8, 48) | rsymaskt_48(d2, b2) | reg(r3, 12, 48)); }
+inline void Assembler::z_srlg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SRLG_ZOPC | regt(r1, 8, 48) | rsymaskt_48(d2, b2) | reg(r3, 12, 48)); }
// rotate left
-inline void Assembler::z_rll( Register r1, Register r3, int64_t d2, Register b2) { emit_48( RLL_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | reg(b2, 16, 48)); }
-inline void Assembler::z_rllg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( RLLG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | reg(b2, 16, 48)); }
+inline void Assembler::z_rll( Register r1, Register r3, int64_t d2, Register b2) { emit_48( RLL_ZOPC | regt(r1, 8, 48) | rsymaskt_48(d2, b2) | reg(r3, 12, 48)); }
+inline void Assembler::z_rllg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( RLLG_ZOPC | regt(r1, 8, 48) | rsymaskt_48(d2, b2) | reg(r3, 12, 48)); }
// Rotate the AND/XOR/OR/insert
inline void Assembler::z_rnsbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool test_only) { // Rotate then AND selected bits. -- z196
@@ -388,9 +419,9 @@ inline void Assembler::z_risbg( Register r1, Register r2, int64_t spos3, int64_t
//------------------------------
// LOGICAL OPERATIONS
//------------------------------
-inline void Assembler::z_n( Register r1, int64_t d2, Register x2, Register b2) { emit_32( N_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
-inline void Assembler::z_ny( Register r1, int64_t d2, Register x2, Register b2) { emit_48( NY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_ng( Register r1, int64_t d2, Register x2, Register b2) { emit_48( NG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_n( Register r1, int64_t d2, Register x2, Register b2) { emit_32( N_ZOPC | regt(r1, 8, 32) | rxmask_32( d2, x2, b2)); }
+inline void Assembler::z_ny( Register r1, int64_t d2, Register x2, Register b2) { emit_48( NY_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_ng( Register r1, int64_t d2, Register x2, Register b2) { emit_48( NG_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
inline void Assembler::z_n( Register r1, const Address& a) { z_n( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
inline void Assembler::z_ny( Register r1, const Address& a) { z_ny(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
inline void Assembler::z_ng( Register r1, const Address& a) { z_ng(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
@@ -407,9 +438,9 @@ inline void Assembler::z_nill(Register r1, int64_t i2) { emit_32( NILL_ZOPC | re
inline void Assembler::z_nihf(Register r1, int64_t i2) { emit_48( NIHF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
inline void Assembler::z_nilf(Register r1, int64_t i2) { emit_48( NILF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
-inline void Assembler::z_o( Register r1, int64_t d2, Register x2, Register b2) { emit_32( O_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
-inline void Assembler::z_oy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( OY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_og( Register r1, int64_t d2, Register x2, Register b2) { emit_48( OG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_o( Register r1, int64_t d2, Register x2, Register b2) { emit_32( O_ZOPC | regt(r1, 8, 32) | rxmask_32( d2, x2, b2)); }
+inline void Assembler::z_oy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( OY_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_og( Register r1, int64_t d2, Register x2, Register b2) { emit_48( OG_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
inline void Assembler::z_o( Register r1, const Address& a) { z_o( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
inline void Assembler::z_oy( Register r1, const Address& a) { z_oy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
inline void Assembler::z_og( Register r1, const Address& a) { z_og(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
@@ -426,9 +457,9 @@ inline void Assembler::z_oill(Register r1, int64_t i2) { emit_32( OILL_ZOPC | re
inline void Assembler::z_oihf(Register r1, int64_t i2) { emit_48( OIHF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
inline void Assembler::z_oilf(Register r1, int64_t i2) { emit_48( OILF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
-inline void Assembler::z_x( Register r1, int64_t d2, Register x2, Register b2) { emit_32( X_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
-inline void Assembler::z_xy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( XY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_xg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( XG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_x( Register r1, int64_t d2, Register x2, Register b2) { emit_32( X_ZOPC | regt(r1, 8, 32) | rxmask_32( d2, x2, b2)); }
+inline void Assembler::z_xy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( XY_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_xg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( XG_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
inline void Assembler::z_x( Register r1, const Address& a) { z_x( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
inline void Assembler::z_xy( Register r1, const Address& a) { z_xy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
inline void Assembler::z_xg( Register r1, const Address& a) { z_xg(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
@@ -441,9 +472,9 @@ inline void Assembler::z_xgrk(Register r1, Register r2, Register r3) { emit_32(
inline void Assembler::z_xihf(Register r1, int64_t i2) { emit_48( XIHF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
inline void Assembler::z_xilf(Register r1, int64_t i2) { emit_48( XILF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); }
-inline void Assembler::z_nc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( NC_ZOPC | uimm12(d1, 20, 48) | uimm8(l, 8, 48) | regz(b1, 16, 48) | uimm12(d2, 36, 48) | regz(b2, 32, 48)); }
-inline void Assembler::z_oc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( OC_ZOPC | uimm12(d1, 20, 48) | uimm8(l, 8, 48) | regz(b1, 16, 48) | uimm12(d2, 36, 48) | regz(b2, 32, 48)); }
-inline void Assembler::z_xc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( XC_ZOPC | uimm12(d1, 20, 48) | uimm8(l, 8, 48) | regz(b1, 16, 48) | uimm12(d2, 36, 48) | regz(b2, 32, 48)); }
+inline void Assembler::z_nc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( NC_ZOPC | uimm8(l, 8, 48) | rsmask_48(d1, b1) | rsmask_SS(d2, b2)); }
+inline void Assembler::z_oc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( OC_ZOPC | uimm8(l, 8, 48) | rsmask_48(d1, b1) | rsmask_SS(d2, b2)); }
+inline void Assembler::z_xc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( XC_ZOPC | uimm8(l, 8, 48) | rsmask_48(d1, b1) | rsmask_SS(d2, b2)); }
inline void Assembler::z_nc(Address dst, int64_t len, Address src2) { assert(!dst.has_index() && !src2.has_index(), "Cannot encode index"); z_nc(dst.disp12(), len-1, dst.base(), src2.disp12(), src2.base()); }
inline void Assembler::z_oc(Address dst, int64_t len, Address src2) { assert(!dst.has_index() && !src2.has_index(), "Cannot encode index"); z_oc(dst.disp12(), len-1, dst.base(), src2.disp12(), src2.base()); }
inline void Assembler::z_xc(Address dst, int64_t len, Address src2) { assert(!dst.has_index() && !src2.has_index(), "Cannot encode index"); z_xc(dst.disp12(), len-1, dst.base(), src2.disp12(), src2.base()); }
@@ -452,14 +483,14 @@ inline void Assembler::z_xc(Address dst, int64_t len, Address src2) { assert(!ds
//---------------
// ADD
//---------------
-inline void Assembler::z_a( Register r1, int64_t d2, Register x2, Register b2) { emit_32( A_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
-inline void Assembler::z_ay( Register r1, int64_t d2, Register x2, Register b2) { emit_48( AY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_al( Register r1, int64_t d2, Register x2, Register b2) { emit_32( AL_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
-inline void Assembler::z_aly( Register r1, int64_t d2, Register x2, Register b2) { emit_48( ALY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_ag( Register r1, int64_t d2, Register x2, Register b2) { emit_48( AG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_agf( Register r1, int64_t d2, Register x2, Register b2) { emit_48( AGF_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_alg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( ALG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_algf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( ALGF_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_a( Register r1, int64_t d2, Register x2, Register b2) { emit_32( A_ZOPC | regt(r1, 8, 32) | rxmask_32( d2, x2, b2)); }
+inline void Assembler::z_ay( Register r1, int64_t d2, Register x2, Register b2) { emit_48( AY_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_al( Register r1, int64_t d2, Register x2, Register b2) { emit_32( AL_ZOPC | regt(r1, 8, 32) | rxmask_32( d2, x2, b2)); }
+inline void Assembler::z_aly( Register r1, int64_t d2, Register x2, Register b2) { emit_48( ALY_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_ag( Register r1, int64_t d2, Register x2, Register b2) { emit_48( AG_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_agf( Register r1, int64_t d2, Register x2, Register b2) { emit_48( AGF_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_alg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( ALG_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_algf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( ALGF_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
inline void Assembler::z_a( Register r1, const Address& a) { z_a( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
inline void Assembler::z_ay( Register r1, const Address& a) { z_ay( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
inline void Assembler::z_al( Register r1, const Address& a) { z_al( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
@@ -501,10 +532,10 @@ inline void Assembler::z_alhsik( Register r1, Register r3, int64_t i2) { emit_48
inline void Assembler::z_alghsik(Register r1, Register r3, int64_t i2) { emit_48( ALGHSIK_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm16(i2, 16, 48)); }
// In-memory arithmetic (add signed, add logical with signed immediate)
-inline void Assembler::z_asi( int64_t d1, Register b1, int64_t i2) { emit_48( ASI_ZOPC | simm8(i2, 8, 48) | simm20(d1) | regz(b1, 16, 48)); }
-inline void Assembler::z_agsi( int64_t d1, Register b1, int64_t i2) { emit_48( AGSI_ZOPC | simm8(i2, 8, 48) | simm20(d1) | regz(b1, 16, 48)); }
-inline void Assembler::z_alsi( int64_t d1, Register b1, int64_t i2) { emit_48( ALSI_ZOPC | simm8(i2, 8, 48) | simm20(d1) | regz(b1, 16, 48)); }
-inline void Assembler::z_algsi(int64_t d1, Register b1, int64_t i2) { emit_48( ALGSI_ZOPC | simm8(i2, 8, 48) | simm20(d1) | regz(b1, 16, 48)); }
+inline void Assembler::z_asi( int64_t d1, Register b1, int64_t i2) { emit_48( ASI_ZOPC | simm8(i2, 8, 48) | rsymask_48(d1, b1)); }
+inline void Assembler::z_agsi( int64_t d1, Register b1, int64_t i2) { emit_48( AGSI_ZOPC | simm8(i2, 8, 48) | rsymask_48(d1, b1)); }
+inline void Assembler::z_alsi( int64_t d1, Register b1, int64_t i2) { emit_48( ALSI_ZOPC | simm8(i2, 8, 48) | rsymask_48(d1, b1)); }
+inline void Assembler::z_algsi(int64_t d1, Register b1, int64_t i2) { emit_48( ALGSI_ZOPC | simm8(i2, 8, 48) | rsymask_48(d1, b1)); }
inline void Assembler::z_asi( const Address& d, int64_t i2) { assert(!d.has_index(), "No index in ASI"); z_asi( d.disp(), d.base(), i2); }
inline void Assembler::z_agsi( const Address& d, int64_t i2) { assert(!d.has_index(), "No index in AGSI"); z_agsi( d.disp(), d.base(), i2); }
inline void Assembler::z_alsi( const Address& d, int64_t i2) { assert(!d.has_index(), "No index in ALSI"); z_alsi( d.disp(), d.base(), i2); }
@@ -514,14 +545,18 @@ inline void Assembler::z_algsi(const Address& d, int64_t i2) { assert(!d.has_ind
//--------------------
// SUBTRACT
//--------------------
-inline void Assembler::z_s( Register r1, int64_t d2, Register x2, Register b2) { emit_32( S_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
-inline void Assembler::z_sy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( SY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_sg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( SG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_sgf( Register r1, int64_t d2, Register x2, Register b2) { emit_48( SGF_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_slg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( SLG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_slgf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( SLGF_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_s( Register r1, int64_t d2, Register x2, Register b2) { emit_32( S_ZOPC | regt(r1, 8, 32) | rxmask_32( d2, x2, b2)); }
+inline void Assembler::z_sy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( SY_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_sl( Register r1, int64_t d2, Register x2, Register b2) { emit_32( SL_ZOPC | regt(r1, 8, 32) | rxmask_32( d2, x2, b2)); }
+inline void Assembler::z_sly( Register r1, int64_t d2, Register x2, Register b2) { emit_48( SLY_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_sg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( SG_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_sgf( Register r1, int64_t d2, Register x2, Register b2) { emit_48( SGF_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_slg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( SLG_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_slgf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( SLGF_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
inline void Assembler::z_s( Register r1, const Address& a) { z_s( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
inline void Assembler::z_sy( Register r1, const Address& a) { z_sy( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_sl( Register r1, const Address& a) { z_sl( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_sly( Register r1, const Address& a) { z_sly( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
inline void Assembler::z_sg( Register r1, const Address& a) { z_sg( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
inline void Assembler::z_sgf( Register r1, const Address& a) { z_sgf( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
inline void Assembler::z_slg( Register r1, const Address& a) { z_slg( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
@@ -533,8 +568,8 @@ inline void Assembler::z_sgfr(Register r1, Register r2) { emit_32( SGFR_ZOPC | r
inline void Assembler::z_srk( Register r1, Register r2, Register r3) { emit_32( SRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
inline void Assembler::z_sgrk(Register r1, Register r2, Register r3) { emit_32( SGRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); }
-inline void Assembler::z_sh( Register r1, int64_t d2, Register x2, Register b2) { emit_32( SH_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
-inline void Assembler::z_shy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( SHY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_sh( Register r1, int64_t d2, Register x2, Register b2) { emit_32( SH_ZOPC | regt(r1, 8, 32) | rxmask_32( d2, x2, b2)); }
+inline void Assembler::z_shy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( SHY_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
inline void Assembler::z_sh( Register r1, const Address &a) { z_sh( r1, a.disp(), a.indexOrR0(), a.base()); }
inline void Assembler::z_shy( Register r1, const Address &a) { z_shy(r1, a.disp(), a.indexOrR0(), a.base()); }
@@ -560,12 +595,12 @@ inline void Assembler::z_msgfr(Register r1, Register r2) { emit_32( MSGFR_ZOPC |
inline void Assembler::z_mlr( Register r1, Register r2) { emit_32( MLR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
inline void Assembler::z_mlgr( Register r1, Register r2) { emit_32( MLGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
-inline void Assembler::z_mhy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( MHY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_msy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( MSY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_msg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( MSG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_msgf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( MSGF_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_ml( Register r1, int64_t d2, Register x2, Register b2) { emit_48( ML_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_mlg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( MLG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_mhy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( MHY_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_msy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( MSY_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_msg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( MSG_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_msgf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( MSGF_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_ml( Register r1, int64_t d2, Register x2, Register b2) { emit_48( ML_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_mlg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( MLG_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
inline void Assembler::z_mhy( Register r1, const Address& a) { z_mhy( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
inline void Assembler::z_msy( Register r1, const Address& a) { z_msy( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
@@ -583,45 +618,44 @@ inline void Assembler::z_mghi( Register r1, int64_t i2) { emit_32( MGHI_ZOPC |
//------------------
// DIVIDE
//------------------
-inline void Assembler::z_dsgr( Register r1, Register r2) { emit_32( DSGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_dsgr( Register r1, Register r2) { emit_32( DSGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
inline void Assembler::z_dsgfr(Register r1, Register r2) { emit_32( DSGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
//-------------------
// COMPARE
//-------------------
-inline void Assembler::z_cr( Register r1, Register r2) { emit_16( CR_ZOPC | reg(r1, 8, 16) | reg(r2,12,16)); }
+inline void Assembler::z_cr( Register r1, Register r2) { emit_16( CR_ZOPC | reg(r1, 8, 16) | reg(r2, 12, 16)); }
+inline void Assembler::z_clr( Register r1, Register r2) { emit_16( CLR_ZOPC | reg(r1, 8, 16) | reg(r2, 12, 16)); }
inline void Assembler::z_cgr( Register r1, Register r2) { emit_32( CGR_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32)); }
inline void Assembler::z_cgfr(Register r1, Register r2) { emit_32( CGFR_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_clgr(Register r1, Register r2) { emit_32( CLGR_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32)); }
inline void Assembler::z_chi( Register r1, int64_t i2) { emit_32( CHI_ZOPC | reg(r1, 8, 32) | simm16(i2, 16, 32)); }
inline void Assembler::z_cghi(Register r1, int64_t i2) { emit_32( CGHI_ZOPC | reg(r1, 8, 32) | simm16(i2, 16, 32)); }
inline void Assembler::z_cfi( Register r1, int64_t i2) { emit_48( CFI_ZOPC | regt(r1, 8, 48) | uimm32(i2, 16, 48)); }
inline void Assembler::z_cgfi(Register r1, int64_t i2) { emit_48( CGFI_ZOPC | regt(r1, 8, 48) | uimm32(i2, 16, 48)); }
-inline void Assembler::z_ch(Register r1, const Address &a) { z_ch(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_ch(Register r1, int64_t d2, Register x2, Register b2) { emit_32( CH_ZOPC | reg(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
-inline void Assembler::z_c(Register r1, const Address &a) { z_c(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_c(Register r1, int64_t d2, Register x2, Register b2) { emit_32( C_ZOPC | reg(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
-inline void Assembler::z_cy(Register r1, const Address &a) { z_cy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_cy(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_cy(Register r1, int64_t d2, Register b2) { z_cy(r1, d2, Z_R0, b2); }
-inline void Assembler::z_cg(Register r1, const Address &a) { z_cg(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_cg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CG_ZOPC | reg(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_clr(Register r1, Register r2) { emit_16( CLR_ZOPC | reg(r1,8,16) | reg(r2,12,16)); }
-inline void Assembler::z_clgr(Register r1, Register r2) { emit_32( CLGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
-
-
-inline void Assembler::z_clfi(Register r1, int64_t i2) { emit_48( CLFI_ZOPC | regt(r1, 8, 48) | uimm32(i2, 16, 48)); }
+inline void Assembler::z_ch( Register r1, int64_t d2, Register x2, Register b2) { emit_32( CH_ZOPC | reg(r1, 8, 32) | rxmask_32( d2, x2, b2)); }
+inline void Assembler::z_c( Register r1, int64_t d2, Register x2, Register b2) { emit_32( C_ZOPC | reg(r1, 8, 32) | rxmask_32( d2, x2, b2)); }
+inline void Assembler::z_cy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( CY_ZOPC | reg(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_cg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( CG_ZOPC | reg(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_ch( Register r1, const Address &a) { z_ch(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_c( Register r1, const Address &a) { z_c( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_cy( Register r1, const Address &a) { z_cy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_cg( Register r1, const Address &a) { z_cg(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+
+inline void Assembler::z_clfi( Register r1, int64_t i2) { emit_48( CLFI_ZOPC | regt(r1, 8, 48) | uimm32(i2, 16, 48)); }
inline void Assembler::z_clgfi(Register r1, int64_t i2) { emit_48( CLGFI_ZOPC | regt(r1, 8, 48) | uimm32(i2, 16, 48)); }
-inline void Assembler::z_cl(Register r1, const Address &a) { z_cl(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_cl(Register r1, int64_t d2, Register x2, Register b2) { emit_32( CL_ZOPC | regt(r1, 8, 32) | uimm12(d2,20,32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
-inline void Assembler::z_cly(Register r1, const Address &a) { z_cly(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_cly(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CLY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_cly(Register r1, int64_t d2, Register b2) { z_cly(r1, d2, Z_R0, b2); }
-inline void Assembler::z_clg(Register r1, const Address &a) { z_clg(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_clg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CLG_ZOPC | reg(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_clc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( CLC_ZOPC | uimm12(d1, 20, 48) | uimm8(l, 8, 48) | regz(b1, 16, 48) | uimm12(d2, 36, 48) | regz(b2, 32, 48)); }
-inline void Assembler::z_clcle(Register r1, Register r3, int64_t d2, Register b2) { emit_32( CLCLE_ZOPC | reg(r1, 8, 32) | reg(r3, 12, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); }
-inline void Assembler::z_clclu(Register r1, Register r3, int64_t d2, Register b2) { emit_48( CLCLU_ZOPC | reg(r1, 8, 48) | reg(r3, 12, 48) | uimm12(d2, 20, 48) | reg(b2, 16, 48)); }
+inline void Assembler::z_cl( Register r1, int64_t d2, Register x2, Register b2) { emit_32( CL_ZOPC | regt(r1, 8, 32) | rxmask_32( d2, x2, b2)); }
+inline void Assembler::z_cly( Register r1, int64_t d2, Register x2, Register b2) { emit_48( CLY_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_clg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( CLG_ZOPC | reg( r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_cl( Register r1, const Address &a) { z_cl( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_cly( Register r1, const Address &a) { z_cly(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_clg( Register r1, const Address &a) { z_clg(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+
+inline void Assembler::z_clc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( CLC_ZOPC | uimm8(l, 8, 48) | rsmask_48(d1, b1) | rsmask_SS(d2, b2)); }
+inline void Assembler::z_clcle(Register r1, Register r3, int64_t d2, Register b2) { emit_32( CLCLE_ZOPC | reg(r1, 8, 32) | reg(r3, 12, 32) | rsmaskt_32( d2, b2)); }
+inline void Assembler::z_clclu(Register r1, Register r3, int64_t d2, Register b2) { emit_48( CLCLU_ZOPC | reg(r1, 8, 48) | reg(r3, 12, 48) | rsymaskt_48(d2, b2)); }
inline void Assembler::z_tmll(Register r1, int64_t i2) { emit_32( TMLL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
inline void Assembler::z_tmlh(Register r1, int64_t i2) { emit_32( TMLH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); }
@@ -634,86 +668,89 @@ inline void Assembler::z_trot(Register r1, Register r2, int64_t m3) { emit_32( T
inline void Assembler::z_trto(Register r1, Register r2, int64_t m3) { emit_32( TRTO_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
inline void Assembler::z_trtt(Register r1, Register r2, int64_t m3) { emit_32( TRTT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
+
+//------------------------
+// COMPARE AND BRANCH/TRAP
+//------------------------
// signed comparison
-inline void Assembler::z_crb(Register r1, Register r2, branch_condition m3, int64_t d4, Register b4) { emit_48( CRB_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | uimm4(m3, 32, 48)); }
-inline void Assembler::z_cgrb(Register r1, Register r2, branch_condition m3, int64_t d4, Register b4) { emit_48( CGRB_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | uimm4(m3, 32, 48)); }
-inline void Assembler::z_crj(Register r1, Register r2, branch_condition m3, address a4) { emit_48( CRJ_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm4(m3, 32, 48)); }
-inline void Assembler::z_cgrj(Register r1, Register r2, branch_condition m3, address a4) { emit_48( CGRJ_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm4(m3, 32, 48)); }
-inline void Assembler::z_cib(Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4) { emit_48( CIB_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | simm8(i2, 32, 48)); }
-inline void Assembler::z_cgib(Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4) { emit_48( CGIB_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | simm8(i2, 32, 48)); }
-inline void Assembler::z_cij(Register r1, int64_t i2, branch_condition m3, address a4) { emit_48( CIJ_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | simm8(i2, 32, 48)); }
-inline void Assembler::z_cgij(Register r1, int64_t i2, branch_condition m3, address a4) { emit_48( CGIJ_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | simm8(i2, 32, 48)); }
+inline void Assembler::z_crb(Register r1, Register r2, branch_condition m3, int64_t d4, Register b4) { emit_48( CRB_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | rsmask_48(d4, b4) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_cgrb(Register r1, Register r2, branch_condition m3, int64_t d4, Register b4) { emit_48( CGRB_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | rsmask_48(d4, b4) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_crj(Register r1, Register r2, branch_condition m3, address a4) { emit_48( CRJ_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_cgrj(Register r1, Register r2, branch_condition m3, address a4) { emit_48( CGRJ_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_cib(Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4) { emit_48( CIB_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | rsmask_48(d4, b4) | simm8(i2, 32, 48)); }
+inline void Assembler::z_cgib(Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4) { emit_48( CGIB_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | rsmask_48(d4, b4) | simm8(i2, 32, 48)); }
+inline void Assembler::z_cij(Register r1, int64_t i2, branch_condition m3, address a4) { emit_48( CIJ_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | simm8(i2, 32, 48)); }
+inline void Assembler::z_cgij(Register r1, int64_t i2, branch_condition m3, address a4) { emit_48( CGIJ_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | simm8(i2, 32, 48)); }
// unsigned comparison
-inline void Assembler::z_clrb(Register r1, Register r2, branch_condition m3, int64_t d4, Register b4) { emit_48( CLRB_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | uimm4(m3, 32, 48)); }
-inline void Assembler::z_clgrb(Register r1, Register r2, branch_condition m3, int64_t d4, Register b4) { emit_48( CLGRB_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | uimm4(m3, 32, 48)); }
-inline void Assembler::z_clrj(Register r1, Register r2, branch_condition m3, address a4) { emit_48( CLRJ_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm4(m3, 32, 48)); }
-inline void Assembler::z_clgrj(Register r1, Register r2, branch_condition m3, address a4) { emit_48( CLGRJ_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm4(m3, 32, 48)); }
-inline void Assembler::z_clib(Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4) { emit_48( CLIB_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | uimm8(i2, 32, 48)); }
-inline void Assembler::z_clgib(Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4) { emit_48( CLGIB_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | uimm8(i2, 32, 48)); }
-inline void Assembler::z_clij(Register r1, int64_t i2, branch_condition m3, address a4) { emit_48( CLIJ_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm8(i2, 32, 48)); }
+inline void Assembler::z_clrb(Register r1, Register r2, branch_condition m3, int64_t d4, Register b4) { emit_48( CLRB_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | rsmask_48(d4, b4) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_clgrb(Register r1, Register r2, branch_condition m3, int64_t d4, Register b4) { emit_48( CLGRB_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | rsmask_48(d4, b4) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_clrj(Register r1, Register r2, branch_condition m3, address a4) { emit_48( CLRJ_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_clgrj(Register r1, Register r2, branch_condition m3, address a4) { emit_48( CLGRJ_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_clib(Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4) { emit_48( CLIB_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | rsmask_48(d4, b4) | uimm8(i2, 32, 48)); }
+inline void Assembler::z_clgib(Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4) { emit_48( CLGIB_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | rsmask_48(d4, b4) | uimm8(i2, 32, 48)); }
+inline void Assembler::z_clij(Register r1, int64_t i2, branch_condition m3, address a4) { emit_48( CLIJ_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm8(i2, 32, 48)); }
inline void Assembler::z_clgij(Register r1, int64_t i2, branch_condition m3, address a4) { emit_48( CLGIJ_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm8(i2, 32, 48)); }
// Compare and trap instructions (signed).
-inline void Assembler::z_crt(Register r1, Register r2, int64_t m3) { emit_32( CRT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
-inline void Assembler::z_cgrt(Register r1, Register r2, int64_t m3) { emit_32( CGRT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
+inline void Assembler::z_crt(Register r1, Register r2, int64_t m3) { emit_32( CRT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
+inline void Assembler::z_cgrt(Register r1, Register r2, int64_t m3) { emit_32( CGRT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
inline void Assembler::z_cit(Register r1, int64_t i2, int64_t m3) { emit_48( CIT_ZOPC | reg(r1, 8, 48) | simm16(i2, 16, 48) | uimm4(m3, 32, 48)); }
inline void Assembler::z_cgit(Register r1, int64_t i2, int64_t m3) { emit_48( CGIT_ZOPC | reg(r1, 8, 48) | simm16(i2, 16, 48) | uimm4(m3, 32, 48)); }
// Compare and trap instructions (unsigned).
-inline void Assembler::z_clrt(Register r1, Register r2, int64_t m3) { emit_32( CLRT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
-inline void Assembler::z_clgrt(Register r1, Register r2, int64_t m3) { emit_32( CLGRT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
+inline void Assembler::z_clrt(Register r1, Register r2, int64_t m3) { emit_32( CLRT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
+inline void Assembler::z_clgrt(Register r1, Register r2, int64_t m3) { emit_32( CLGRT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); }
inline void Assembler::z_clfit(Register r1, int64_t i2, int64_t m3) { emit_48( CLFIT_ZOPC | reg(r1, 8, 48) | uimm16(i2, 16, 48) | uimm4(m3, 32, 48)); }
inline void Assembler::z_clgit(Register r1, int64_t i2, int64_t m3) { emit_48( CLGIT_ZOPC | reg(r1, 8, 48) | uimm16(i2, 16, 48) | uimm4(m3, 32, 48)); }
-inline void Assembler::z_bc( branch_condition m1, int64_t d2, Register x2, Register b2) { emit_32( BC_ZOPC | 0 << 16 | uimm4(m1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
-inline void Assembler::z_bcr( branch_condition m1, Register r2) { emit_16( BCR_ZOPC | uimm4(m1,8,16) | reg(r2,12,16)); }
-inline void Assembler::z_brc( branch_condition i1, int64_t i2) { emit_32( BRC_ZOPC | uimm4(i1, 8, 32) | simm16(i2, 16, 32)); }
-inline void Assembler::z_brc( branch_condition i1, address a) { emit_32( BRC_ZOPC | uimm4(i1, 8, 32) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 32)); }
-inline void Assembler::z_brcl(branch_condition i1, address a) { emit_48( BRCL_ZOPC | uimm4(i1, 8, 48)| simm32(RelAddr::pcrel_off32(a, pc()), 16, 48)); }
-inline void Assembler::z_bctgr(Register r1, Register r2) { emit_32( BCTGR_ZOPC | reg( r1, 24, 32) | reg( r2, 28, 32)); };
-inline void Assembler::z_basr(Register r1, Register r2) { emit_16( BASR_ZOPC | regt(r1,8,16) | reg(r2,12,16)); }
-
-inline void Assembler::z_brasl(Register r1, address a) { emit_48( BRASL_ZOPC | regt(r1, 8, 48) | simm32(RelAddr::pcrel_off32(a, pc()), 16, 48)); }
-
-inline void Assembler::z_brct(Register r1, address a) { emit_32( BRCT_ZOPC | regt(r1, 8, 32) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 32)); }
-inline void Assembler::z_brct(Register r1, Label& L) {z_brct(r1, target(L)); }
+//------------------------
+// BRANCH/CALL
+//------------------------
+inline void Assembler::z_bc( branch_condition m1, int64_t d2, Register x2, Register b2) { emit_32( BC_ZOPC | uimm4(m1, 8, 32) | rxmask_32(d2, x2, b2)); }
+inline void Assembler::z_bcr( branch_condition m1, Register r2) { emit_16( BCR_ZOPC | uimm4(m1, 8, 16) | reg(r2, 12, 16)); }
+inline void Assembler::z_brc( branch_condition i1, int64_t i2) { emit_32( BRC_ZOPC | uimm4(i1, 8, 32) | simm16(i2, 16, 32)); }
+inline void Assembler::z_brc( branch_condition i1, address a) { emit_32( BRC_ZOPC | uimm4(i1, 8, 32) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 32)); }
+inline void Assembler::z_brcl(branch_condition i1, address a) { emit_48( BRCL_ZOPC | uimm4(i1, 8, 48) | simm32(RelAddr::pcrel_off32(a, pc()), 16, 48)); }
+inline void Assembler::z_bctgr(Register r1, Register r2) { emit_32( BCTGR_ZOPC | reg( r1, 24, 32) | reg( r2, 28, 32)); };
-inline void Assembler::z_brxh(Register r1, Register r3, address a) {emit_32( BRXH_ZOPC | reg(r1, 8, 32) | reg(r3, 12, 32) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 32));}
-inline void Assembler::z_brxh(Register r1, Register r3, Label& L) {z_brxh(r1, r3, target(L)); }
+inline void Assembler::z_basr( Register r1, Register r2) { emit_16( BASR_ZOPC | regt(r1, 8, 16) | reg(r2, 12, 16)); }
+inline void Assembler::z_brasl(Register r1, address a) { emit_48( BRASL_ZOPC | regt(r1, 8, 48) | simm32(RelAddr::pcrel_off32(a, pc()), 16, 48)); }
+inline void Assembler::z_brct(Register r1, address a) {emit_32( BRCT_ZOPC | reg(r1, 8, 32) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 32)); }
+inline void Assembler::z_brxh(Register r1, Register r3, address a) {emit_32( BRXH_ZOPC | reg(r1, 8, 32) | reg(r3, 12, 32) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 32));}
inline void Assembler::z_brxle(Register r1, Register r3, address a) {emit_32( BRXLE_ZOPC | reg(r1, 8, 32) | reg(r3, 12, 32) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 32));}
-inline void Assembler::z_brxle(Register r1, Register r3, Label& L) {z_brxle(r1, r3, target(L)); }
-
inline void Assembler::z_brxhg(Register r1, Register r3, address a) {emit_48( BRXHG_ZOPC | reg(r1, 8, 48) | reg(r3, 12, 48) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 48));}
-inline void Assembler::z_brxhg(Register r1, Register r3, Label& L) {z_brxhg(r1, r3, target(L)); }
-
inline void Assembler::z_brxlg(Register r1, Register r3, address a) {emit_48( BRXLG_ZOPC | reg(r1, 8, 48) | reg(r3, 12, 48) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 48));}
+inline void Assembler::z_brct(Register r1, Label& L) {z_brct( r1, target(L)); }
+inline void Assembler::z_brxh(Register r1, Register r3, Label& L) {z_brxh( r1, r3, target(L)); }
+inline void Assembler::z_brxle(Register r1, Register r3, Label& L) {z_brxle(r1, r3, target(L)); }
+inline void Assembler::z_brxhg(Register r1, Register r3, Label& L) {z_brxhg(r1, r3, target(L)); }
inline void Assembler::z_brxlg(Register r1, Register r3, Label& L) {z_brxlg(r1, r3, target(L)); }
-inline void Assembler::z_flogr(Register r1, Register r2) { emit_32( FLOGR_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32)); }
-inline void Assembler::z_popcnt(Register r1, Register r2) { emit_32( POPCNT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32)); }
-inline void Assembler::z_ahhhr(Register r1, Register r2, Register r3) { emit_32( AHHHR_ZOPC | reg(r3, 16, 32) | reg(r1, 24, 32) | reg(r2, 28, 32)); }
-inline void Assembler::z_ahhlr(Register r1, Register r2, Register r3) { emit_32( AHHLR_ZOPC | reg(r3, 16, 32) | reg(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_flogr( Register r1, Register r2) { emit_32( FLOGR_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_popcnt(Register r1, Register r2) { emit_32( POPCNT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_ahhhr( Register r1, Register r2, Register r3) { emit_32( AHHHR_ZOPC | reg(r3, 16, 32) | reg(r1, 24, 32) | reg(r2, 28, 32)); }
+inline void Assembler::z_ahhlr( Register r1, Register r2, Register r3) { emit_32( AHHLR_ZOPC | reg(r3, 16, 32) | reg(r1, 24, 32) | reg(r2, 28, 32)); }
inline void Assembler::z_tam() { emit_16( TAM_ZOPC); }
-inline void Assembler::z_stckf(int64_t d2, Register b2) { emit_32( STCKF_ZOPC | uimm12(d2, 20, 32) | regz(b2, 16, 32)); }
-inline void Assembler::z_stm( Register r1, Register r3, int64_t d2, Register b2) { emit_32( STM_ZOPC | reg(r1, 8, 32) | reg(r3,12,32)| reg(b2,16,32) | uimm12(d2, 20,32)); }
-inline void Assembler::z_stmy(Register r1, Register r3, int64_t d2, Register b2) { emit_48( STMY_ZOPC | reg(r1, 8, 48) | reg(r3,12,48)| reg(b2,16,48) | simm20(d2) ); }
-inline void Assembler::z_stmg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( STMG_ZOPC | reg(r1, 8, 48) | reg(r3,12,48)| reg(b2,16,48) | simm20(d2) ); }
-inline void Assembler::z_lm( Register r1, Register r3, int64_t d2, Register b2) { emit_32( LM_ZOPC | reg(r1, 8, 32) | reg(r3,12,32)| reg(b2,16,32) | uimm12(d2, 20,32)); }
-inline void Assembler::z_lmy( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LMY_ZOPC | reg(r1, 8, 48) | reg(r3,12,48)| reg(b2,16,48) | simm20(d2) ); }
-inline void Assembler::z_lmg( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LMG_ZOPC | reg(r1, 8, 48) | reg(r3,12,48)| reg(b2,16,48) | simm20(d2) ); }
-
-inline void Assembler::z_cs( Register r1, Register r3, int64_t d2, Register b2) { emit_32( CS_ZOPC | regt(r1, 8, 32) | reg(r3, 12, 32) | reg(b2, 16, 32) | uimm12(d2, 20, 32)); }
-inline void Assembler::z_csy(Register r1, Register r3, int64_t d2, Register b2) { emit_48( CSY_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | reg(b2, 16, 48) | simm20(d2)); }
-inline void Assembler::z_csg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( CSG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | reg(b2, 16, 48) | simm20(d2)); }
+inline void Assembler::z_stckf(int64_t d2, Register b2) { emit_32( STCKF_ZOPC | rsmask_32(d2, b2)); }
+inline void Assembler::z_stm( Register r1, Register r3, int64_t d2, Register b2) { emit_32( STM_ZOPC | reg(r1, 8, 32) | reg(r3,12,32)| rsmask_32( d2, b2)); }
+inline void Assembler::z_stmy(Register r1, Register r3, int64_t d2, Register b2) { emit_48( STMY_ZOPC | reg(r1, 8, 48) | reg(r3,12,48)| rsymask_48(d2, b2)); }
+inline void Assembler::z_stmg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( STMG_ZOPC | reg(r1, 8, 48) | reg(r3,12,48)| rsymask_48(d2, b2)); }
+inline void Assembler::z_lm( Register r1, Register r3, int64_t d2, Register b2) { emit_32( LM_ZOPC | reg(r1, 8, 32) | reg(r3,12,32)| rsmask_32( d2, b2)); }
+inline void Assembler::z_lmy( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LMY_ZOPC | reg(r1, 8, 48) | reg(r3,12,48)| rsymask_48(d2, b2)); }
+inline void Assembler::z_lmg( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LMG_ZOPC | reg(r1, 8, 48) | reg(r3,12,48)| rsymask_48(d2, b2)); }
+
+inline void Assembler::z_cs( Register r1, Register r3, int64_t d2, Register b2) { emit_32( CS_ZOPC | regt(r1, 8, 32) | reg(r3, 12, 32) | rsmask_32( d2, b2)); }
+inline void Assembler::z_csy(Register r1, Register r3, int64_t d2, Register b2) { emit_48( CSY_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | rsymask_48(d2, b2)); }
+inline void Assembler::z_csg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( CSG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | rsymask_48(d2, b2)); }
inline void Assembler::z_cs( Register r1, Register r3, const Address& a) { assert(!a.has_index(), "Cannot encode index"); z_cs( r1, r3, a.disp(), a.baseOrR0()); }
inline void Assembler::z_csy(Register r1, Register r3, const Address& a) { assert(!a.has_index(), "Cannot encode index"); z_csy(r1, r3, a.disp(), a.baseOrR0()); }
inline void Assembler::z_csg(Register r1, Register r3, const Address& a) { assert(!a.has_index(), "Cannot encode index"); z_csg(r1, r3, a.disp(), a.baseOrR0()); }
-inline void Assembler::z_cvd(Register r1, int64_t d2, Register x2, Register b2) { emit_32( CVD_ZOPC | regt(r1, 8, 32) | reg(x2, 12, 32) | reg(b2, 16, 32) | uimm12(d2, 20, 32)); }
-inline void Assembler::z_cvdg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CVDG_ZOPC | regt(r1, 8, 48) | reg(x2, 12, 48) | reg(b2, 16, 48) | simm20(d2)); }
+inline void Assembler::z_cvd( Register r1, int64_t d2, Register x2, Register b2) { emit_32( CVD_ZOPC | regt(r1, 8, 32) | rxmask_32( d2, x2, b2)); }
+inline void Assembler::z_cvdg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CVDG_ZOPC | regt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
//---------------------------
@@ -1142,30 +1179,30 @@ inline void Assembler::z_ler( FloatRegister r1, FloatRegister r2) { emit_16( LE
inline void Assembler::z_ldr( FloatRegister r1, FloatRegister r2) { emit_16( LDR_ZOPC | fregt(r1,8,16) | freg(r2,12,16)); }
inline void Assembler::z_ldebr(FloatRegister r1, FloatRegister r2) { emit_32( LDEBR_ZOPC | fregt(r1, 24, 32) | freg(r2, 28, 32)); }
inline void Assembler::z_ledbr(FloatRegister r1, FloatRegister r2) { emit_32( LEDBR_ZOPC | fregt(r1, 24, 32) | freg(r2, 28, 32)); }
-inline void Assembler::z_le( FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_32( LE_ZOPC | fregt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
-inline void Assembler::z_ley(FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( LEY_ZOPC | fregt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_ld( FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_32( LD_ZOPC | fregt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
-inline void Assembler::z_ldy(FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( LDY_ZOPC | fregt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_le( FloatRegister r1, const Address &a) { z_le( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_ley(FloatRegister r1, const Address &a) { z_ley(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_ld( FloatRegister r1, const Address &a) { z_ld( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_ldy(FloatRegister r1, const Address &a) { z_ldy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_le( FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_32( LE_ZOPC | fregt(r1, 8, 32) | rxmask_32( d2, x2, b2)); }
+inline void Assembler::z_ley( FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( LEY_ZOPC | fregt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_ld( FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_32( LD_ZOPC | fregt(r1, 8, 32) | rxmask_32( d2, x2, b2)); }
+inline void Assembler::z_ldy( FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( LDY_ZOPC | fregt(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_le( FloatRegister r1, const Address &a) { z_le( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ley( FloatRegister r1, const Address &a) { z_ley(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ld( FloatRegister r1, const Address &a) { z_ld( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ldy( FloatRegister r1, const Address &a) { z_ldy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_lzdr(FloatRegister r1) { emit_32( LZDR_ZOPC | fregt(r1, 24, 32)); }
-inline void Assembler::z_lzer(FloatRegister f1) { emit_32( LZER_ZOPC | fregt(f1, 24, 32)); }
+inline void Assembler::z_lzdr( FloatRegister r1) { emit_32( LZDR_ZOPC | fregt(r1, 24, 32)); }
+inline void Assembler::z_lzer( FloatRegister f1) { emit_32( LZER_ZOPC | fregt(f1, 24, 32)); }
//-----------------
// STORE
//-----------------
-inline void Assembler::z_ste( FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_32( STE_ZOPC | freg(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
-inline void Assembler::z_stey(FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( STEY_ZOPC | freg(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_std( FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_32( STD_ZOPC | freg(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); }
-inline void Assembler::z_stdy(FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( STDY_ZOPC | freg(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_ste( FloatRegister r1, const Address &a) { z_ste( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_stey(FloatRegister r1, const Address &a) { z_stey(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_std( FloatRegister r1, const Address &a) { z_std( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_stdy(FloatRegister r1, const Address &a) { z_stdy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ste( FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_32( STE_ZOPC | freg(r1, 8, 32) | rxmask_32( d2, x2, b2)); }
+inline void Assembler::z_stey( FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( STEY_ZOPC | freg(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_std( FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_32( STD_ZOPC | freg(r1, 8, 32) | rxmask_32( d2, x2, b2)); }
+inline void Assembler::z_stdy( FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( STDY_ZOPC | freg(r1, 8, 48) | rxymask_48(d2, x2, b2)); }
+inline void Assembler::z_ste( FloatRegister r1, const Address &a) { z_ste( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_stey( FloatRegister r1, const Address &a) { z_stey(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_std( FloatRegister r1, const Address &a) { z_std( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_stdy( FloatRegister r1, const Address &a) { z_stdy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
//---------------
@@ -1173,35 +1210,34 @@ inline void Assembler::z_stdy(FloatRegister r1, const Address &a) { z_stdy(r1, a
//---------------
inline void Assembler::z_aebr( FloatRegister f1, FloatRegister f2) { emit_32( AEBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
inline void Assembler::z_adbr( FloatRegister f1, FloatRegister f2) { emit_32( ADBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
-inline void Assembler::z_aeb( FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( AEB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
-inline void Assembler::z_adb( FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( ADB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
-inline void Assembler::z_aeb( FloatRegister r1, const Address& a) { z_aeb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_adb( FloatRegister r1, const Address& a) { z_adb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_aeb( FloatRegister f1, int64_t d2, Register x2, Register b2) { emit_48( AEB_ZOPC | fregt( f1, 8, 48) | rxmask_48(d2, x2, b2));}
+inline void Assembler::z_adb( FloatRegister f1, int64_t d2, Register x2, Register b2) { emit_48( ADB_ZOPC | fregt( f1, 8, 48) | rxmask_48(d2, x2, b2));}
+inline void Assembler::z_aeb( FloatRegister r1, const Address& a) { z_aeb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_adb( FloatRegister r1, const Address& a) { z_adb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
//---------------
// SUB
//---------------
-inline void Assembler::z_sebr( FloatRegister f1, FloatRegister f2) { emit_32( SEBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
-inline void Assembler::z_sdbr( FloatRegister f1, FloatRegister f2) { emit_32( SDBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
-inline void Assembler::z_seb( FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( SEB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
-inline void Assembler::z_sdb( FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( SDB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
+inline void Assembler::z_sebr( FloatRegister f1, FloatRegister f2) { emit_32( SEBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
+inline void Assembler::z_sdbr( FloatRegister f1, FloatRegister f2) { emit_32( SDBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
+inline void Assembler::z_seb( FloatRegister f1, int64_t d2, Register x2, Register b2) { emit_48( SEB_ZOPC | fregt( f1, 8, 48) | rxmask_48(d2, x2, b2));}
+inline void Assembler::z_sdb( FloatRegister f1, int64_t d2, Register x2, Register b2) { emit_48( SDB_ZOPC | fregt( f1, 8, 48) | rxmask_48(d2, x2, b2));}
inline void Assembler::z_seb( FloatRegister r1, const Address& a) { z_seb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
inline void Assembler::z_sdb( FloatRegister r1, const Address& a) { z_sdb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
inline void Assembler::z_lcebr(FloatRegister r1, FloatRegister r2) { emit_32( LCEBR_ZOPC | fregt(r1, 24, 32) | freg(r2, 28, 32)); }
inline void Assembler::z_lcdbr(FloatRegister r1, FloatRegister r2) { emit_32( LCDBR_ZOPC | fregt(r1, 24, 32) | freg(r2, 28, 32)); }
-
-inline void Assembler::z_lpdbr( FloatRegister fr1, FloatRegister fr2) { emit_32( LPDBR_ZOPC | fregt( fr1, 24,32) | freg((fr2 == fnoreg) ? fr1:fr2, 28, 32)); }
+inline void Assembler::z_lpdbr(FloatRegister r1, FloatRegister r2) { emit_32( LPDBR_ZOPC | fregt(r1, 24, 32) | freg((r2 == fnoreg) ? r1:r2, 28, 32)); }
//---------------
// MUL
//---------------
-inline void Assembler::z_meebr(FloatRegister f1, FloatRegister f2) { emit_32( MEEBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
-inline void Assembler::z_mdbr( FloatRegister f1, FloatRegister f2) { emit_32( MDBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
-inline void Assembler::z_meeb( FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( MEEB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
-inline void Assembler::z_mdb( FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( MDB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
+inline void Assembler::z_meebr(FloatRegister f1, FloatRegister f2) { emit_32( MEEBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
+inline void Assembler::z_mdbr( FloatRegister f1, FloatRegister f2) { emit_32( MDBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
+inline void Assembler::z_meeb( FloatRegister f1, int64_t d2, Register x2, Register b2) { emit_48( MEEB_ZOPC | fregt( f1, 8, 48) | rxmask_48(d2, x2, b2));}
+inline void Assembler::z_mdb( FloatRegister f1, int64_t d2, Register x2, Register b2) { emit_48( MDB_ZOPC | fregt( f1, 8, 48) | rxmask_48(d2, x2, b2));}
inline void Assembler::z_meeb( FloatRegister r1, const Address& a) { z_meeb( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
inline void Assembler::z_mdb( FloatRegister r1, const Address& a) { z_mdb( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
@@ -1213,43 +1249,43 @@ inline void Assembler::z_maebr(FloatRegister f1, FloatRegister f3, FloatRegister
inline void Assembler::z_madbr(FloatRegister f1, FloatRegister f3, FloatRegister f2) { emit_32( MADBR_ZOPC | fregt(f1, 16, 32) | freg(f3, 24, 32) | freg(f2, 28, 32) );}
inline void Assembler::z_msebr(FloatRegister f1, FloatRegister f3, FloatRegister f2) { emit_32( MSEBR_ZOPC | fregt(f1, 16, 32) | freg(f3, 24, 32) | freg(f2, 28, 32) );}
inline void Assembler::z_msdbr(FloatRegister f1, FloatRegister f3, FloatRegister f2) { emit_32( MSDBR_ZOPC | fregt(f1, 16, 32) | freg(f3, 24, 32) | freg(f2, 28, 32) );}
-inline void Assembler::z_maeb(FloatRegister f1, FloatRegister f3, int64_t d2, Register x2, Register b2) { emit_48( MAEB_ZOPC | fregt(f1, 32, 48) | freg(f3, 8, 48) | uimm12(d2, 20, 48) | reg(x2, 12, 48) | regz(b2, 16, 48) );}
-inline void Assembler::z_madb(FloatRegister f1, FloatRegister f3, int64_t d2, Register x2, Register b2) { emit_48( MADB_ZOPC | fregt(f1, 32, 48) | freg(f3, 8, 48) | uimm12(d2, 20, 48) | reg(x2, 12, 48) | regz(b2, 16, 48) );}
-inline void Assembler::z_mseb(FloatRegister f1, FloatRegister f3, int64_t d2, Register x2, Register b2) { emit_48( MSEB_ZOPC | fregt(f1, 32, 48) | freg(f3, 8, 48) | uimm12(d2, 20, 48) | reg(x2, 12, 48) | regz(b2, 16, 48) );}
-inline void Assembler::z_msdb(FloatRegister f1, FloatRegister f3, int64_t d2, Register x2, Register b2) { emit_48( MSDB_ZOPC | fregt(f1, 32, 48) | freg(f3, 8, 48) | uimm12(d2, 20, 48) | reg(x2, 12, 48) | regz(b2, 16, 48) );}
-inline void Assembler::z_maeb(FloatRegister f1, FloatRegister f3, const Address& a) { z_maeb(f1, f3, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_madb(FloatRegister f1, FloatRegister f3, const Address& a) { z_madb(f1, f3, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_mseb(FloatRegister f1, FloatRegister f3, const Address& a) { z_mseb(f1, f3, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_msdb(FloatRegister f1, FloatRegister f3, const Address& a) { z_msdb(f1, f3, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_maeb(FloatRegister f1, FloatRegister f3, int64_t d2, Register x2, Register b2) { emit_48( MAEB_ZOPC | fregt(f1, 32, 48) | freg(f3, 8, 48) | rxmask_48(d2, x2, b2) );}
+inline void Assembler::z_madb(FloatRegister f1, FloatRegister f3, int64_t d2, Register x2, Register b2) { emit_48( MADB_ZOPC | fregt(f1, 32, 48) | freg(f3, 8, 48) | rxmask_48(d2, x2, b2) );}
+inline void Assembler::z_mseb(FloatRegister f1, FloatRegister f3, int64_t d2, Register x2, Register b2) { emit_48( MSEB_ZOPC | fregt(f1, 32, 48) | freg(f3, 8, 48) | rxmask_48(d2, x2, b2) );}
+inline void Assembler::z_msdb(FloatRegister f1, FloatRegister f3, int64_t d2, Register x2, Register b2) { emit_48( MSDB_ZOPC | fregt(f1, 32, 48) | freg(f3, 8, 48) | rxmask_48(d2, x2, b2) );}
+inline void Assembler::z_maeb(FloatRegister f1, FloatRegister f3, const Address& a) { z_maeb(f1, f3, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_madb(FloatRegister f1, FloatRegister f3, const Address& a) { z_madb(f1, f3, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_mseb(FloatRegister f1, FloatRegister f3, const Address& a) { z_mseb(f1, f3, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_msdb(FloatRegister f1, FloatRegister f3, const Address& a) { z_msdb(f1, f3, a.disp(), a.indexOrR0(), a.baseOrR0()); }
//---------------
// DIV
//---------------
-inline void Assembler::z_debr( FloatRegister f1, FloatRegister f2) { emit_32( DEBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
-inline void Assembler::z_ddbr( FloatRegister f1, FloatRegister f2) { emit_32( DDBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
-inline void Assembler::z_deb( FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( DEB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
-inline void Assembler::z_ddb( FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( DDB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
-inline void Assembler::z_deb( FloatRegister r1, const Address& a) { z_deb( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
-inline void Assembler::z_ddb( FloatRegister r1, const Address& a) { z_ddb( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_debr( FloatRegister f1, FloatRegister f2) { emit_32( DEBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
+inline void Assembler::z_ddbr( FloatRegister f1, FloatRegister f2) { emit_32( DDBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));}
+inline void Assembler::z_deb( FloatRegister f1, int64_t d2, Register x2, Register b2) { emit_48( DEB_ZOPC | fregt( f1, 8, 48) | rxmask_48(d2, x2, b2));}
+inline void Assembler::z_ddb( FloatRegister f1, int64_t d2, Register x2, Register b2) { emit_48( DDB_ZOPC | fregt( f1, 8, 48) | rxmask_48(d2, x2, b2));}
+inline void Assembler::z_deb( FloatRegister r1, const Address& a) { z_deb( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_ddb( FloatRegister r1, const Address& a) { z_ddb( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
//---------------
// square root
//---------------
-inline void Assembler::z_sqdbr(FloatRegister f1, FloatRegister f2) { emit_32(SQDBR_ZOPC | fregt(f1, 24, 32) | freg(f2, 28, 32)); }
-inline void Assembler::z_sqdb( FloatRegister fr1, int64_t d2, Register x2, Register b2 ) { emit_48( SQDB_ZOPC | fregt( fr1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));}
-inline void Assembler::z_sqdb( FloatRegister fr1, int64_t d2, Register b2) { z_sqdb( fr1, d2, Z_R0, b2);}
+inline void Assembler::z_sqdbr(FloatRegister r1, FloatRegister r2) { emit_32(SQDBR_ZOPC | fregt(r1, 24, 32) | freg(r2, 28, 32)); }
+inline void Assembler::z_sqdb( FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( SQDB_ZOPC | fregt( r1, 8, 48) | rxmask_48(d2, x2, b2));}
+inline void Assembler::z_sqdb( FloatRegister r1, int64_t d2, Register b2) { z_sqdb( r1, d2, Z_R0, b2);}
//---------------
// CMP
//---------------
inline void Assembler::z_cebr(FloatRegister r1, FloatRegister r2) { emit_32( CEBR_ZOPC | fregt(r1, 24, 32) | freg(r2, 28, 32)); }
-inline void Assembler::z_ceb(FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( CEB_ZOPC | fregt(r1, 8, 48) | uimm12(d2, 20, 48) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
-inline void Assembler::z_ceb(FloatRegister r1, const Address &a) { z_ceb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
inline void Assembler::z_cdbr(FloatRegister r1, FloatRegister r2) { emit_32( CDBR_ZOPC | fregt(r1, 24, 32) | freg(r2, 28, 32)); }
-inline void Assembler::z_cdb(FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( CDB_ZOPC | fregt(r1, 8, 48) | uimm12(d2, 20, 48) | reg(x2, 12, 48) | regz(b2, 16, 48)); }
+inline void Assembler::z_ceb(FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( CEB_ZOPC | fregt(r1, 8, 48) | rxmask_48(d2, x2, b2)); }
+inline void Assembler::z_cdb(FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( CDB_ZOPC | fregt(r1, 8, 48) | rxmask_48(d2, x2, b2)); }
+inline void Assembler::z_ceb(FloatRegister r1, const Address &a) { z_ceb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
inline void Assembler::z_cdb(FloatRegister r1, const Address &a) { z_cdb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
@@ -1280,13 +1316,16 @@ inline void Assembler::z_cgdbr(Register r1, FloatRegister r2, RoundingMode m) {
inline void Assembler::z_st(Register r1, int64_t d2, Register b2) { z_st( r1, d2, Z_R0, b2); }
inline void Assembler::z_sty(Register r1, int64_t d2, Register b2) { z_sty( r1, d2, Z_R0, b2); }
inline void Assembler::z_stg(Register r1, int64_t d2, Register b2) { z_stg( r1, d2, Z_R0, b2); }
- inline void Assembler::z_lgf(Register r1, int64_t d2, Register b2) { z_lgf( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_lgb(Register r1, int64_t d2, Register b2) { z_lgb( r1, d2, Z_R0, b2); }
inline void Assembler::z_lgh(Register r1, int64_t d2, Register b2) { z_lgh( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_lgf(Register r1, int64_t d2, Register b2) { z_lgf( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_llgc(Register r1, int64_t d2, Register b2) { z_llgc(r1, d2, Z_R0, b2); }
inline void Assembler::z_llgh(Register r1, int64_t d2, Register b2) { z_llgh(r1, d2, Z_R0, b2); }
inline void Assembler::z_llgf(Register r1, int64_t d2, Register b2) { z_llgf(r1, d2, Z_R0, b2); }
- inline void Assembler::z_lgb(Register r1, int64_t d2, Register b2) { z_lgb( r1, d2, Z_R0, b2); }
inline void Assembler::z_cl( Register r1, int64_t d2, Register b2) { z_cl( r1, d2, Z_R0, b2); }
- inline void Assembler::z_c(Register r1, int64_t d2, Register b2) { z_c( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_cly(Register r1, int64_t d2, Register b2) { z_cly( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_c( Register r1, int64_t d2, Register b2) { z_c( r1, d2, Z_R0, b2); }
+ inline void Assembler::z_cy(Register r1, int64_t d2, Register b2) { z_cy( r1, d2, Z_R0, b2); }
inline void Assembler::z_cg(Register r1, int64_t d2, Register b2) { z_cg( r1, d2, Z_R0, b2); }
inline void Assembler::z_sh(Register r1, int64_t d2, Register b2) { z_sh( r1, d2, Z_R0, b2); }
inline void Assembler::z_shy(Register r1, int64_t d2, Register b2) { z_shy( r1, d2, Z_R0, b2); }
@@ -1346,24 +1385,40 @@ inline void Assembler::z_brc( branch_condition m, Label& L) { z_brc(m, target(L)
inline void Assembler::z_brcl(branch_condition m, Label& L) { z_brcl(m, target(L)); }
-// Instruction must start at passed address.
-// Extra check for illtraps with ID.
-inline unsigned int Assembler::instr_len(unsigned char *instr) {
- switch ((*instr) >> 6) {
+// Instruction len bits must be stored right-justified in argument.
+inline unsigned int Assembler::instr_len(unsigned char len_bits) {
+ assert(len_bits < 4, "bad instruction len %d", len_bits);
+ switch (len_bits) {
case 0: return 2;
case 1: // fallthru
case 2: return 4;
case 3: return 6;
default:
- // Control can't reach here.
- // The switch expression examines just the leftmost two bytes
+ // len_bits contains, right-justified, only the leftmost two bits
// of the main opcode. So the range of values is just [0..3].
- // Having a default clause makes the compiler happy.
ShouldNotReachHere();
return 0;
}
}
+// Instruction must start at passed address.
+inline unsigned int Assembler::instr_len(unsigned char *instr) {
+ return instr_len((unsigned char)((*instr) >> 6));
+}
+
+// Instruction must be stored right-justified in argument.
+inline unsigned int Assembler::instr_len(unsigned long instr) {
+ // An instruction is 2, 4, or 6 bytes in length.
+ // The instruction length in encoded in the leftmost two bits of the instruction.
+ // len = 6: len_bits = [46..47]
+ // len = 4: len_bits = [30..31]
+ // len = 2: len_bits = [14..15]
+ unsigned char len_bits = instr >> (48-2);
+ if (len_bits == 0) len_bits = instr >> (32-2);
+ if (len_bits == 0) len_bits = instr >> (16-2);
+ return instr_len(len_bits);
+}
+
// Move instr at pc right-justified into passed long int.
// Return instr len in bytes as function result.
// Note: 2-byte instr don't really need to be accessed unsigned
@@ -1383,9 +1438,7 @@ inline unsigned int Assembler::get_instruction(unsigned char *pc, unsigned long
*instr = ((unsigned long)(*(unsigned int*) pc)) << 16 |
((unsigned long)*(unsigned short*) (pc + 4)); break;
default:
- // Control can't reach here.
// The length as returned from instr_len() can only be 2, 4, or 6 bytes.
- // Having a default clause makes the compiler happy.
ShouldNotReachHere();
*instr = 0L; // This assignment is there to make gcc8 happy.
break;
diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
index 1d1f163826e243de60c85a19890b5191a1140bee..efa142332ef9defe0279baad901f38524eaee693 100644
--- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2019 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -1440,7 +1440,10 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op
}
// result = condition ? opr1 : opr2
-void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
+ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
+ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on s390");
+
Assembler::branch_condition acond = Assembler::bcondEqual, ncond = Assembler::bcondNotEqual;
switch (condition) {
case lir_cond_equal: acond = Assembler::bcondEqual; ncond = Assembler::bcondNotEqual; break;
diff --git a/src/hotspot/cpu/s390/c2_MacroAssembler_s390.cpp b/src/hotspot/cpu/s390/c2_MacroAssembler_s390.cpp
index 04a6b88052c99cf11b4397061af77f5abd28888a..6fac285f738ace567bf016f244d59e68031db260 100644
--- a/src/hotspot/cpu/s390/c2_MacroAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/c2_MacroAssembler_s390.cpp
@@ -823,52 +823,64 @@ unsigned int C2_MacroAssembler::string_inflate_const(Register src, Register dst,
return offset() - block_start;
}
-// Kills src.
-unsigned int C2_MacroAssembler::has_negatives(Register result, Register src, Register cnt,
- Register odd_reg, Register even_reg, Register tmp) {
- int block_start = offset();
- Label Lloop1, Lloop2, Lslow, Lnotfound, Ldone;
- const Register addr = src, mask = tmp;
-
- BLOCK_COMMENT("has_negatives {");
-
- z_llgfr(Z_R1, cnt); // Number of bytes to read. (Must be a positive simm32.)
- z_llilf(mask, 0x80808080);
- z_lhi(result, 1); // Assume true.
- // Last possible addr for fast loop.
- z_lay(odd_reg, -16, Z_R1, src);
- z_chi(cnt, 16);
- z_brl(Lslow);
-
- // ind1: index, even_reg: index increment, odd_reg: index limit
- z_iihf(mask, 0x80808080);
- z_lghi(even_reg, 16);
-
- bind(Lloop1); // 16 bytes per iteration.
- z_lg(Z_R0, Address(addr));
- z_lg(Z_R1, Address(addr, 8));
- z_ogr(Z_R0, Z_R1);
- z_ngr(Z_R0, mask);
- z_brne(Ldone); // If found return 1.
- z_brxlg(addr, even_reg, Lloop1);
-
- bind(Lslow);
- z_aghi(odd_reg, 16-1); // Last possible addr for slow loop.
- z_lghi(even_reg, 1);
- z_cgr(addr, odd_reg);
- z_brh(Lnotfound);
-
- bind(Lloop2); // 1 byte per iteration.
- z_cli(Address(addr), 0x80);
- z_brnl(Ldone); // If found return 1.
- z_brxlg(addr, even_reg, Lloop2);
-
- bind(Lnotfound);
- z_lhi(result, 0);
-
- bind(Ldone);
-
- BLOCK_COMMENT("} has_negatives");
+// Returns the number of non-negative bytes (aka US-ASCII characters) found
+// before the first negative byte is encountered.
+unsigned int C2_MacroAssembler::count_positives(Register result, Register src, Register cnt, Register tmp) {
+ const unsigned int block_start = offset();
+ const unsigned int byte_mask = 0x80;
+ const unsigned int twobyte_mask = byte_mask<<8 | byte_mask;
+ const unsigned int unroll_factor = 16;
+ const unsigned int log_unroll_factor = exact_log2(unroll_factor);
+ Register pos = src; // current position in src array, restored at end
+ Register ctr = result; // loop counter, result value
+ Register mask = tmp; // holds the sign detection mask
+ Label unrolledLoop, unrolledDone, byteLoop, allDone;
+
+ assert_different_registers(result, src, cnt, tmp);
+
+ BLOCK_COMMENT("count_positives {");
+
+ lgr_if_needed(pos, src); // current position in src array
+ z_srak(ctr, cnt, log_unroll_factor); // # iterations of unrolled loop
+ z_brnh(unrolledDone); // array too short for unrolled loop
+
+ z_iilf(mask, twobyte_mask<<16 | twobyte_mask);
+ z_iihf(mask, twobyte_mask<<16 | twobyte_mask);
+
+ bind(unrolledLoop);
+ z_lmg(Z_R0, Z_R1, 0, pos);
+ z_ogr(Z_R0, Z_R1);
+ z_ngr(Z_R0, mask);
+ z_brne(unrolledDone); // There is a negative byte somewhere.
+ // ctr and pos are not updated yet ->
+ // delegate finding correct pos to byteLoop.
+ add2reg(pos, unroll_factor);
+ z_brct(ctr, unrolledLoop);
+
+ // Once we arrive here, we have to examine at most (unroll_factor - 1) bytes more.
+ // We then either have reached the end of the array or we hit a negative byte.
+ bind(unrolledDone);
+ z_sll(ctr, log_unroll_factor); // calculate # bytes not processed by unrolled loop
+ // > 0 only if a negative byte was found
+ z_lr(Z_R0, cnt); // calculate remainder bytes
+ z_nilf(Z_R0, unroll_factor - 1);
+ z_ar(ctr, Z_R0); // remaining bytes
+ z_brnh(allDone); // shortcut if nothing left to do
+
+ bind(byteLoop);
+ z_cli(0, pos, byte_mask); // unsigned comparison! byte@pos must be smaller that byte_mask
+ z_brnl(allDone); // negative byte found.
+
+ add2reg(pos, 1);
+ z_brct(ctr, byteLoop);
+
+ bind(allDone);
+
+ z_srk(ctr, cnt, ctr); // # bytes actually processed (= cnt or index of first negative byte)
+ z_sgfr(pos, ctr); // restore src
+ z_lgfr(result, ctr); // unnecessary. Only there to be sure the high word has a defined state.
+
+ BLOCK_COMMENT("} count_positives");
return offset() - block_start;
}
diff --git a/src/hotspot/cpu/s390/c2_MacroAssembler_s390.hpp b/src/hotspot/cpu/s390/c2_MacroAssembler_s390.hpp
index a6c9865649522d807d91c3255daa05e9c04ad865..a502e41ee08ee12ca3f4af48dbfe7b37a59a5b4f 100644
--- a/src/hotspot/cpu/s390/c2_MacroAssembler_s390.hpp
+++ b/src/hotspot/cpu/s390/c2_MacroAssembler_s390.hpp
@@ -57,9 +57,7 @@
// len is signed int. Counts # characters, not bytes.
unsigned int string_inflate_const(Register src, Register dst, Register tmp, int len);
- // Kills src.
- unsigned int has_negatives(Register result, Register src, Register cnt,
- Register odd_reg, Register even_reg, Register tmp);
+ unsigned int count_positives(Register result, Register src, Register cnt, Register tmp);
unsigned int string_compare(Register str1, Register str2, Register cnt1, Register cnt2,
Register odd_reg, Register even_reg, Register result, int ae);
diff --git a/src/hotspot/cpu/s390/frame_s390.inline.hpp b/src/hotspot/cpu/s390/frame_s390.inline.hpp
index d8a4395d8cad82800bda84422c6621b6c339c3f3..5574e6384e2218d99dfc7d26a6b6f303e193230c 100644
--- a/src/hotspot/cpu/s390/frame_s390.inline.hpp
+++ b/src/hotspot/cpu/s390/frame_s390.inline.hpp
@@ -155,6 +155,10 @@ inline intptr_t* frame::link() const {
return (intptr_t*) callers_abi()->callers_sp;
}
+inline intptr_t* frame::link_or_null() const {
+ return link();
+}
+
inline intptr_t** frame::interpreter_frame_locals_addr() const {
return (intptr_t**) &(ijava_state()->locals);
}
diff --git a/src/hotspot/cpu/s390/matcher_s390.hpp b/src/hotspot/cpu/s390/matcher_s390.hpp
index ac55bd84dff10a5f79b3ae1376c31a37e0351d21..5c56ec5373b7d751e98c614ac6c3b192eb34fecd 100644
--- a/src/hotspot/cpu/s390/matcher_s390.hpp
+++ b/src/hotspot/cpu/s390/matcher_s390.hpp
@@ -153,4 +153,9 @@
// Implements a variant of EncodeISOArrayNode that encode ASCII only
static const bool supports_encode_ascii_array = true;
+ // Returns pre-selection estimated cost of a vector operation.
+ static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
+ return 0;
+ }
+
#endif // CPU_S390_MATCHER_S390_HPP
diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad
index 74ad8ef40d31cfd15d7b7091e290487e5e2e6785..f603c5fa00a44caa0c29141bd253de372b2e76b1 100644
--- a/src/hotspot/cpu/s390/s390.ad
+++ b/src/hotspot/cpu/s390/s390.ad
@@ -697,27 +697,18 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
}
static inline void z_emit16(CodeBuffer &cbuf, long value) {
- // 32bit instructions may become sign extended.
- assert(value >= 0, "unintended sign extension (int->long)");
- assert(value < (1L << 16), "instruction too large");
- *((unsigned short*)(cbuf.insts_end())) = (unsigned short)value;
- cbuf.set_insts_end(cbuf.insts_end() + sizeof(unsigned short));
+ C2_MacroAssembler _masm(&cbuf);
+ __ emit_instruction((unsigned long)value, 2);
}
static inline void z_emit32(CodeBuffer &cbuf, long value) {
- // 32bit instructions may become sign extended.
- assert(value < (1L << 32), "instruction too large");
- *((unsigned int*)(cbuf.insts_end())) = (unsigned int)value;
- cbuf.set_insts_end(cbuf.insts_end() + sizeof(unsigned int));
+ C2_MacroAssembler _masm(&cbuf);
+ __ emit_instruction((unsigned long)value, 4);
}
static inline void z_emit48(CodeBuffer &cbuf, long value) {
- // 32bit instructions may become sign extended.
- assert(value >= 0, "unintended sign extension (int->long)");
- assert(value < (1L << 48), "instruction too large");
- value = value<<16;
- memcpy(cbuf.insts_end(), (unsigned char*)&value, 6);
- cbuf.set_insts_end(cbuf.insts_end() + 6);
+ C2_MacroAssembler _masm(&cbuf);
+ __ emit_instruction((unsigned long)value, 6);
}
static inline unsigned int z_emit_inst(CodeBuffer &cbuf, long value) {
@@ -726,19 +717,9 @@ static inline unsigned int z_emit_inst(CodeBuffer &cbuf, long value) {
value = (long)((unsigned long)((unsigned int)value));
}
- if (value < (1L << 16)) { // 2-byte instruction
- z_emit16(cbuf, value);
- return 2;
- }
-
- if (value < (1L << 32)) { // 4-byte instruction, might be unaligned store
- z_emit32(cbuf, value);
- return 4;
- }
-
- // 6-byte instruction, probably unaligned store.
- z_emit48(cbuf, value);
- return 6;
+ C2_MacroAssembler _masm(&cbuf);
+ int len = __ emit_instruction((unsigned long)value, 0);
+ return len;
}
// Check effective address (at runtime) for required alignment.
@@ -10273,14 +10254,13 @@ instruct string_inflate_const(Universe dummy, iRegP src, iRegP dst, iRegI tmp, i
%}
// StringCoding.java intrinsics
-instruct has_negatives(rarg5RegP ary1, iRegI len, iRegI result, roddRegI oddReg, revenRegI evenReg, iRegI tmp, flagsReg cr) %{
- match(Set result (HasNegatives ary1 len));
- effect(TEMP_DEF result, USE_KILL ary1, TEMP oddReg, TEMP evenReg, TEMP tmp, KILL cr); // R0, R1 are killed, too.
+instruct count_positives(iRegP ary1, iRegI len, iRegI result, iRegI tmp, flagsReg cr) %{
+ match(Set result (CountPositives ary1 len));
+ effect(TEMP_DEF result, TEMP tmp, KILL cr); // R0, R1 are killed, too.
ins_cost(300);
- format %{ "has negatives byte[] $ary1($len) -> $result" %}
+ format %{ "count positives byte[] $ary1($len) -> $result" %}
ins_encode %{
- __ has_negatives($result$$Register, $ary1$$Register, $len$$Register,
- $oddReg$$Register, $evenReg$$Register, $tmp$$Register);
+ __ count_positives($result$$Register, $ary1$$Register, $len$$Register, $tmp$$Register);
%}
ins_pipe(pipe_class_dummy);
%}
diff --git a/src/hotspot/cpu/s390/stubGenerator_s390.cpp b/src/hotspot/cpu/s390/stubGenerator_s390.cpp
index 102200fc08f3537c894a88e91b20b0a2d511e9b3..21cf227067b9d3619ccfbf0ad67ca9261f32d8f0 100644
--- a/src/hotspot/cpu/s390/stubGenerator_s390.cpp
+++ b/src/hotspot/cpu/s390/stubGenerator_s390.cpp
@@ -2419,9 +2419,9 @@ class StubGenerator: public StubCodeGenerator {
// Put extra information in the stub code, to make it more readable.
// Write the high part of the address.
// [RGV] Check if there is a dependency on the size of this prolog.
- __ emit_32((intptr_t)cdesc >> 32);
- __ emit_32((intptr_t)cdesc);
- __ emit_32(++_stub_count);
+ __ emit_data((intptr_t)cdesc >> 32);
+ __ emit_data((intptr_t)cdesc);
+ __ emit_data(++_stub_count);
#endif
align(true);
}
@@ -2435,7 +2435,7 @@ class StubGenerator: public StubCodeGenerator {
if (at_header) {
while ((intptr_t)(__ pc()) % icache_line_size != 0) {
- __ emit_16(0);
+ __ z_illtrap();
}
} else {
while ((intptr_t)(__ pc()) % icache_half_line_size != 0) {
diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp
index e9652fa04b24f55e4ed48a4239f860db14aee3b9..fbf95ae02b53ba14803595dd93db0235cba11b00 100644
--- a/src/hotspot/cpu/x86/assembler_x86.cpp
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp
@@ -300,12 +300,24 @@ void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
assert(isByte(op1) && isByte(op2), "wrong opcode");
- assert((op1 & 0x01) == 1, "should be 32bit operation");
- assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
+ assert(op1 == 0x81, "Unexpected opcode");
if (is8bit(imm32)) {
emit_int24(op1 | 0x02, // set sign bit
op2 | encode(dst),
imm32 & 0xFF);
+ } else if (dst == rax) {
+ switch (op2) {
+ case 0xD0: emit_int8(0x15); break; // adc
+ case 0xC0: emit_int8(0x05); break; // add
+ case 0xE0: emit_int8(0x25); break; // and
+ case 0xF8: emit_int8(0x3D); break; // cmp
+ case 0xC8: emit_int8(0x0D); break; // or
+ case 0xD8: emit_int8(0x1D); break; // sbb
+ case 0xE8: emit_int8(0x2D); break; // sub
+ case 0xF0: emit_int8(0x35); break; // xor
+ default: ShouldNotReachHere();
+ }
+ emit_int32(imm32);
} else {
emit_int16(op1, (op2 | encode(dst)));
emit_int32(imm32);
@@ -929,6 +941,16 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
tail_size = 1;
break;
+ case 0x15: // adc rax, #32
+ case 0x05: // add rax, #32
+ case 0x25: // and rax, #32
+ case 0x3D: // cmp rax, #32
+ case 0x0D: // or rax, #32
+ case 0x1D: // sbb rax, #32
+ case 0x2D: // sub rax, #32
+ case 0x35: // xor rax, #32
+ return which == end_pc_operand ? ip + 4 : ip;
+
case 0x9B:
switch (0xFF & *ip++) {
case 0xD9: // fnstcw a
@@ -954,6 +976,11 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
debug_only(has_disp32 = true); // has both kinds of operands!
break;
+ case 0xA8: // testb rax, #8
+ return which == end_pc_operand ? ip + 1 : ip;
+ case 0xA9: // testl/testq rax, #32
+ return which == end_pc_operand ? ip + 4 : ip;
+
case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
case 0xC6: // movb a, #8
case 0x80: // cmpb a, #8
@@ -1683,12 +1710,6 @@ void Assembler::cmpl(Address dst, int32_t imm32) {
emit_int32(imm32);
}
-void Assembler::cmp(Register dst, int32_t imm32) {
- prefix(dst);
- emit_int8((unsigned char)0x3D);
- emit_int32(imm32);
-}
-
void Assembler::cmpl(Register dst, int32_t imm32) {
prefix(dst);
emit_arith(0x81, 0xF8, dst, imm32);
@@ -1974,6 +1995,13 @@ void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
emit_int16(0x2C, (0xC0 | encode));
}
+void Assembler::cvtss2sil(Register dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x2D, (0xC0 | encode));
+}
+
void Assembler::cvttss2sil(Register dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
@@ -2067,6 +2095,21 @@ void Assembler::vcvttps2dq(XMMRegister dst, XMMRegister src, int vector_len) {
emit_int16(0x5B, (0xC0 | encode));
}
+void Assembler::vcvtps2dq(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x5B, (0xC0 | encode));
+}
+
+void Assembler::evcvtpd2qq(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x7B, (0xC0 | encode));
+}
+
void Assembler::evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len) {
assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -2389,10 +2432,7 @@ void Assembler::ldmxcsr( Address src) {
void Assembler::leal(Register dst, Address src) {
InstructionMark im(this);
-#ifdef _LP64
- emit_int8(0x67); // addr32
prefix(src, dst);
-#endif // LP64
emit_int8((unsigned char)0x8D);
emit_operand(dst, src);
}
@@ -5775,8 +5815,13 @@ void Assembler::subss(XMMRegister dst, Address src) {
void Assembler::testb(Register dst, int imm8) {
NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
- (void) prefix_and_encode(dst->encoding(), true);
- emit_arith_b(0xF6, 0xC0, dst, imm8);
+ if (dst == rax) {
+ emit_int8((unsigned char)0xA8);
+ emit_int8(imm8);
+ } else {
+ (void) prefix_and_encode(dst->encoding(), true);
+ emit_arith_b(0xF6, 0xC0, dst, imm8);
+ }
}
void Assembler::testb(Address dst, int imm8) {
@@ -5787,14 +5832,34 @@ void Assembler::testb(Address dst, int imm8) {
emit_int8(imm8);
}
+void Assembler::testl(Address dst, int32_t imm32) {
+ if (imm32 >= 0 && is8bit(imm32)) {
+ testb(dst, imm32);
+ return;
+ }
+ InstructionMark im(this);
+ emit_int8((unsigned char)0xF7);
+ emit_operand(as_Register(0), dst);
+ emit_int32(imm32);
+}
+
void Assembler::testl(Register dst, int32_t imm32) {
+ if (imm32 >= 0 && is8bit(imm32) && dst->has_byte_register()) {
+ testb(dst, imm32);
+ return;
+ }
// not using emit_arith because test
// doesn't support sign-extension of
// 8bit operands
- int encode = dst->encoding();
- encode = prefix_and_encode(encode);
- emit_int16((unsigned char)0xF7, (0xC0 | encode));
- emit_int32(imm32);
+ if (dst == rax) {
+ emit_int8((unsigned char)0xA9);
+ emit_int32(imm32);
+ } else {
+ int encode = dst->encoding();
+ encode = prefix_and_encode(encode);
+ emit_int16((unsigned char)0xF7, (0xC0 | encode));
+ emit_int32(imm32);
+ }
}
void Assembler::testl(Register dst, Register src) {
@@ -6450,7 +6515,6 @@ void Assembler::vrndscalepd(XMMRegister dst, Address src, int32_t rmode, int vec
emit_int8((rmode));
}
-
void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -8317,8 +8381,28 @@ void Assembler::vpbroadcastw(XMMRegister dst, Address src, int vector_len) {
emit_operand(dst, src);
}
-// xmm/mem sourced byte/word/dword/qword replicate
+void Assembler::vpsadbw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xF6, (0xC0 | encode));
+}
+
+void Assembler::vpunpckhdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x6A, (0xC0 | encode));
+}
+
+void Assembler::vpunpckldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x62, (0xC0 | encode));
+}
+// xmm/mem sourced byte/word/dword/qword replicate
void Assembler::evpaddb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true);
@@ -9864,12 +9948,12 @@ void Assembler::vpbroadcastq(XMMRegister dst, Address src, int vector_len) {
void Assembler::evbroadcasti32x4(XMMRegister dst, Address src, int vector_len) {
assert(vector_len != Assembler::AVX_128bit, "");
- assert(VM_Version::supports_avx512dq(), "");
+ assert(VM_Version::supports_evex(), "");
assert(dst != xnoreg, "sanity");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
- attributes.set_address_attributes(/* tuple_type */ EVEX_T2, /* input_size_in_bits */ EVEX_64bit);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
// swap src<->dst for encoding
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x5A);
@@ -12222,6 +12306,13 @@ void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
emit_int16(0x2C, (0xC0 | encode));
}
+void Assembler::cvtsd2siq(Register dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x2D, (0xC0 | encode));
+}
+
void Assembler::cvttss2siq(Register dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
@@ -12993,6 +13084,10 @@ void Assembler::subq(Register dst, Register src) {
}
void Assembler::testq(Address dst, int32_t imm32) {
+ if (imm32 >= 0) {
+ testl(dst, imm32);
+ return;
+ }
InstructionMark im(this);
emit_int16(get_prefixq(dst), (unsigned char)0xF7);
emit_operand(as_Register(0), dst);
@@ -13000,13 +13095,23 @@ void Assembler::testq(Address dst, int32_t imm32) {
}
void Assembler::testq(Register dst, int32_t imm32) {
+ if (imm32 >= 0) {
+ testl(dst, imm32);
+ return;
+ }
// not using emit_arith because test
// doesn't support sign-extension of
// 8bit operands
- int encode = dst->encoding();
- encode = prefixq_and_encode(encode);
- emit_int16((unsigned char)0xF7, (0xC0 | encode));
- emit_int32(imm32);
+ if (dst == rax) {
+ prefix(REX_W);
+ emit_int8((unsigned char)0xA9);
+ emit_int32(imm32);
+ } else {
+ int encode = dst->encoding();
+ encode = prefixq_and_encode(encode);
+ emit_int16((unsigned char)0xF7, (0xC0 | encode));
+ emit_int32(imm32);
+ }
}
void Assembler::testq(Register dst, Register src) {
diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp
index f21dd901c5d8d237df8b1d1ab37d896a38a30e37..6af93b52fc639153ab8baf26f14f1a447ae727b7 100644
--- a/src/hotspot/cpu/x86/assembler_x86.hpp
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp
@@ -1081,15 +1081,12 @@ private:
void cmpb(Address dst, int imm8);
void cmpl(Address dst, int32_t imm32);
-
- void cmp(Register dst, int32_t imm32);
void cmpl(Register dst, int32_t imm32);
void cmpl(Register dst, Register src);
void cmpl(Register dst, Address src);
void cmpq(Address dst, int32_t imm32);
void cmpq(Address dst, Register src);
-
void cmpq(Register dst, int32_t imm32);
void cmpq(Register dst, Register src);
void cmpq(Register dst, Address src);
@@ -1152,6 +1149,7 @@ private:
void cvtss2sd(XMMRegister dst, Address src);
// Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
+ void cvtsd2siq(Register dst, XMMRegister src);
void cvttsd2sil(Register dst, Address src);
void cvttsd2sil(Register dst, XMMRegister src);
void cvttsd2siq(Register dst, Address src);
@@ -1160,6 +1158,7 @@ private:
// Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
void cvttss2sil(Register dst, XMMRegister src);
void cvttss2siq(Register dst, XMMRegister src);
+ void cvtss2sil(Register dst, XMMRegister src);
// Convert vector double to int
void cvttpd2dq(XMMRegister dst, XMMRegister src);
@@ -1169,6 +1168,7 @@ private:
void vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len);
// Convert vector float and int
+ void vcvtps2dq(XMMRegister dst, XMMRegister src, int vector_len);
void vcvttps2dq(XMMRegister dst, XMMRegister src, int vector_len);
// Convert vector long to vector FP
@@ -1176,6 +1176,7 @@ private:
void evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len);
// Convert vector double to long
+ void evcvtpd2qq(XMMRegister dst, XMMRegister src, int vector_len);
void evcvttpd2qq(XMMRegister dst, XMMRegister src, int vector_len);
// Evex casts with truncation
@@ -1933,10 +1934,17 @@ private:
// Interleave Low Doublewords
void punpckldq(XMMRegister dst, XMMRegister src);
void punpckldq(XMMRegister dst, Address src);
+ void vpunpckldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+
+ // Interleave High Doublewords
+ void vpunpckhdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
// Interleave Low Quadwords
void punpcklqdq(XMMRegister dst, XMMRegister src);
+ // Vector sum of absolute difference.
+ void vpsadbw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+
#ifndef _LP64 // no 32bit push/pop on amd64
void pushl(Address src);
#endif
@@ -2092,9 +2100,10 @@ private:
void subss(XMMRegister dst, Address src);
void subss(XMMRegister dst, XMMRegister src);
- void testb(Register dst, int imm8);
void testb(Address dst, int imm8);
+ void testb(Register dst, int imm8);
+ void testl(Address dst, int32_t imm32);
void testl(Register dst, int32_t imm32);
void testl(Register dst, Register src);
void testl(Register dst, Address src);
diff --git a/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp b/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp
index a49e2f39a83ad98eacfccd4469aead3ef1d4162f..a9739b1a22b6170adbb35ab38570fe22da3153dd 100644
--- a/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp
+++ b/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp
@@ -152,14 +152,8 @@
return range;
}
- static int get_num_caller_save_xmms(void) {
- int num_caller_save_xmm_regs = nof_caller_save_xmm_regs;
-#ifdef _LP64
- if (UseAVX < 3) {
- num_caller_save_xmm_regs = num_caller_save_xmm_regs / 2;
- }
-#endif
- return num_caller_save_xmm_regs;
+ static int get_num_caller_save_xmms() {
+ return XMMRegisterImpl::available_xmm_registers();
}
static int nof_caller_save_cpu_regs() { return adjust_reg_range(pd_nof_caller_save_cpu_regs_frame_map); }
diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
index eaeeae235f0d2be6ff8f2adface509258fa9e777..9619a7711e9e811ca3c48c864708c701ef0ff91d 100644
--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -2010,7 +2010,10 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
}
}
-void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
+ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
+ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on x86");
+
Assembler::Condition acond, ncond;
switch (condition) {
case lir_cond_equal: acond = Assembler::equal; ncond = Assembler::notEqual; break;
diff --git a/src/hotspot/cpu/x86/c1_LinearScan_x86.hpp b/src/hotspot/cpu/x86/c1_LinearScan_x86.hpp
index 8051406617501a49e0c2a1d7512850d62b4a489e..05c8bbdde5fc10a5b15393d67ae73fdd4270eb2f 100644
--- a/src/hotspot/cpu/x86/c1_LinearScan_x86.hpp
+++ b/src/hotspot/cpu/x86/c1_LinearScan_x86.hpp
@@ -101,12 +101,7 @@ inline void LinearScan::pd_add_temps(LIR_Op* op) {
// Implementation of LinearScanWalker
inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) {
- int last_xmm_reg = pd_last_xmm_reg;
-#ifdef _LP64
- if (UseAVX < 3) {
- last_xmm_reg = pd_first_xmm_reg + (pd_nof_xmm_regs_frame_map / 2) - 1;
- }
-#endif
+ int last_xmm_reg = pd_first_xmm_reg + XMMRegisterImpl::available_xmm_registers() - 1;
if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::byte_reg)) {
assert(cur->type() != T_FLOAT && cur->type() != T_DOUBLE, "cpu regs only");
_first_reg = pd_first_byte_reg;
diff --git a/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp b/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp
index 595e78dc25753a59080df6160514430e8c922925..1bb767994b8524c064c7b8fbc46a1769cada7d79 100644
--- a/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp
@@ -25,6 +25,7 @@
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "c1/c1_Defs.hpp"
+#include "c1/c1_FrameMap.hpp"
#include "c1/c1_MacroAssembler.hpp"
#include "c1/c1_Runtime1.hpp"
#include "ci/ciUtilities.hpp"
@@ -369,12 +370,7 @@ static OopMap* generate_oop_map(StubAssembler* sasm, int num_rt_args,
map->set_callee_saved(VMRegImpl::stack2reg(r15H_off + num_rt_args), r15->as_VMReg()->next());
#endif // _LP64
- int xmm_bypass_limit = FrameMap::nof_xmm_regs;
-#ifdef _LP64
- if (UseAVX < 3) {
- xmm_bypass_limit = xmm_bypass_limit / 2;
- }
-#endif
+ int xmm_bypass_limit = FrameMap::get_num_caller_save_xmms();
if (save_fpu_registers) {
#ifndef _LP64
@@ -487,13 +483,8 @@ void C1_MacroAssembler::save_live_registers_no_oop_map(bool save_fpu_registers)
// so always save them as doubles.
// note that float values are _not_ converted automatically, so for float values
// the second word contains only garbage data.
- int xmm_bypass_limit = FrameMap::nof_xmm_regs;
+ int xmm_bypass_limit = FrameMap::get_num_caller_save_xmms();
int offset = 0;
-#ifdef _LP64
- if (UseAVX < 3) {
- xmm_bypass_limit = xmm_bypass_limit / 2;
- }
-#endif
for (int n = 0; n < xmm_bypass_limit; n++) {
XMMRegister xmm_name = as_XMMRegister(n);
__ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name);
@@ -513,10 +504,7 @@ static void restore_fpu(C1_MacroAssembler* sasm, bool restore_fpu_registers) {
#ifdef _LP64
if (restore_fpu_registers) {
// restore XMM registers
- int xmm_bypass_limit = FrameMap::nof_xmm_regs;
- if (UseAVX < 3) {
- xmm_bypass_limit = xmm_bypass_limit / 2;
- }
+ int xmm_bypass_limit = FrameMap::get_num_caller_save_xmms();
int offset = 0;
for (int n = 0; n < xmm_bypass_limit; n++) {
XMMRegister xmm_name = as_XMMRegister(n);
diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
index 400bcec45e2a01ac946c2e84f95e0e56cb6efd77..c4411be23cf76afc3a0cd4d4e1c1052154086b60 100644
--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
@@ -3374,18 +3374,19 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
}
// Search for Non-ASCII character (Negative byte value) in a byte array,
-// return true if it has any and false otherwise.
+// return the index of the first such character, otherwise the length
+// of the array segment searched.
// ..\jdk\src\java.base\share\classes\java\lang\StringCoding.java
// @IntrinsicCandidate
-// private static boolean hasNegatives(byte[] ba, int off, int len) {
+// public static int countPositives(byte[] ba, int off, int len) {
// for (int i = off; i < off + len; i++) {
// if (ba[i] < 0) {
-// return true;
+// return i - off;
// }
// }
-// return false;
+// return len;
// }
-void C2_MacroAssembler::has_negatives(Register ary1, Register len,
+void C2_MacroAssembler::count_positives(Register ary1, Register len,
Register result, Register tmp1,
XMMRegister vec1, XMMRegister vec2, KRegister mask1, KRegister mask2) {
// rsi: byte array
@@ -3394,17 +3395,18 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,
ShortBranchVerifier sbv(this);
assert_different_registers(ary1, len, result, tmp1);
assert_different_registers(vec1, vec2);
- Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_CHAR, COMPARE_VECTORS, COMPARE_BYTE;
+ Label ADJUST, TAIL_ADJUST, DONE, TAIL_START, CHAR_ADJUST, COMPARE_CHAR, COMPARE_VECTORS, COMPARE_BYTE;
+ movl(result, len); // copy
// len == 0
testl(len, len);
- jcc(Assembler::zero, FALSE_LABEL);
+ jcc(Assembler::zero, DONE);
if ((AVX3Threshold == 0) && (UseAVX > 2) && // AVX512
VM_Version::supports_avx512vlbw() &&
VM_Version::supports_bmi2()) {
- Label test_64_loop, test_tail;
+ Label test_64_loop, test_tail, BREAK_LOOP;
Register tmp3_aliased = len;
movl(tmp1, len);
@@ -3421,16 +3423,15 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,
// Check whether our 64 elements of size byte contain negatives
evpcmpgtb(mask1, vec2, Address(ary1, len, Address::times_1), Assembler::AVX_512bit);
kortestql(mask1, mask1);
- jcc(Assembler::notZero, TRUE_LABEL);
+ jcc(Assembler::notZero, BREAK_LOOP);
addptr(len, 64);
jccb(Assembler::notZero, test_64_loop);
-
bind(test_tail);
// bail out when there is nothing to be done
testl(tmp1, -1);
- jcc(Assembler::zero, FALSE_LABEL);
+ jcc(Assembler::zero, DONE);
// ~(~0 << len) applied up to two times (for 32-bit scenario)
#ifdef _LP64
@@ -3467,21 +3468,30 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,
#endif
evpcmpgtb(mask1, mask2, vec2, Address(ary1, 0), Assembler::AVX_512bit);
ktestq(mask1, mask2);
- jcc(Assembler::notZero, TRUE_LABEL);
+ jcc(Assembler::zero, DONE);
- jmp(FALSE_LABEL);
+ bind(BREAK_LOOP);
+ // At least one byte in the last 64 bytes is negative.
+ // Set up to look at the last 64 bytes as if they were a tail
+ lea(ary1, Address(ary1, len, Address::times_1));
+ addptr(result, len);
+ // Ignore the very last byte: if all others are positive,
+ // it must be negative, so we can skip right to the 2+1 byte
+ // end comparison at this point
+ orl(result, 63);
+ movl(len, 63);
+ // Fallthru to tail compare
} else {
- movl(result, len); // copy
if (UseAVX >= 2 && UseSSE >= 2) {
// With AVX2, use 32-byte vector compare
- Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
+ Label COMPARE_WIDE_VECTORS, BREAK_LOOP;
// Compare 32-byte vectors
- andl(result, 0x0000001f); // tail count (in bytes)
- andl(len, 0xffffffe0); // vector count (in bytes)
- jccb(Assembler::zero, COMPARE_TAIL);
+ testl(len, 0xffffffe0); // vector count (in bytes)
+ jccb(Assembler::zero, TAIL_START);
+ andl(len, 0xffffffe0);
lea(ary1, Address(ary1, len, Address::times_1));
negptr(len);
@@ -3492,30 +3502,42 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,
bind(COMPARE_WIDE_VECTORS);
vmovdqu(vec1, Address(ary1, len, Address::times_1));
vptest(vec1, vec2);
- jccb(Assembler::notZero, TRUE_LABEL);
+ jccb(Assembler::notZero, BREAK_LOOP);
addptr(len, 32);
- jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
+ jccb(Assembler::notZero, COMPARE_WIDE_VECTORS);
- testl(result, result);
- jccb(Assembler::zero, FALSE_LABEL);
+ testl(result, 0x0000001f); // any bytes remaining?
+ jcc(Assembler::zero, DONE);
- vmovdqu(vec1, Address(ary1, result, Address::times_1, -32));
+ // Quick test using the already prepared vector mask
+ movl(len, result);
+ andl(len, 0x0000001f);
+ vmovdqu(vec1, Address(ary1, len, Address::times_1, -32));
vptest(vec1, vec2);
- jccb(Assembler::notZero, TRUE_LABEL);
- jmpb(FALSE_LABEL);
+ jcc(Assembler::zero, DONE);
+ // There are zeros, jump to the tail to determine exactly where
+ jmpb(TAIL_START);
- bind(COMPARE_TAIL); // len is zero
- movl(len, result);
+ bind(BREAK_LOOP);
+ // At least one byte in the last 32-byte vector is negative.
+ // Set up to look at the last 32 bytes as if they were a tail
+ lea(ary1, Address(ary1, len, Address::times_1));
+ addptr(result, len);
+ // Ignore the very last byte: if all others are positive,
+ // it must be negative, so we can skip right to the 2+1 byte
+ // end comparison at this point
+ orl(result, 31);
+ movl(len, 31);
// Fallthru to tail compare
} else if (UseSSE42Intrinsics) {
// With SSE4.2, use double quad vector compare
- Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
+ Label COMPARE_WIDE_VECTORS, BREAK_LOOP;
// Compare 16-byte vectors
- andl(result, 0x0000000f); // tail count (in bytes)
- andl(len, 0xfffffff0); // vector count (in bytes)
- jcc(Assembler::zero, COMPARE_TAIL);
+ testl(len, 0xfffffff0); // vector count (in bytes)
+ jcc(Assembler::zero, TAIL_START);
+ andl(len, 0xfffffff0);
lea(ary1, Address(ary1, len, Address::times_1));
negptr(len);
@@ -3526,23 +3548,36 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,
bind(COMPARE_WIDE_VECTORS);
movdqu(vec1, Address(ary1, len, Address::times_1));
ptest(vec1, vec2);
- jcc(Assembler::notZero, TRUE_LABEL);
+ jccb(Assembler::notZero, BREAK_LOOP);
addptr(len, 16);
- jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
+ jccb(Assembler::notZero, COMPARE_WIDE_VECTORS);
- testl(result, result);
- jcc(Assembler::zero, FALSE_LABEL);
+ testl(result, 0x0000000f); // len is zero, any bytes remaining?
+ jcc(Assembler::zero, DONE);
- movdqu(vec1, Address(ary1, result, Address::times_1, -16));
+ // Quick test using the already prepared vector mask
+ movl(len, result);
+ andl(len, 0x0000000f); // tail count (in bytes)
+ movdqu(vec1, Address(ary1, len, Address::times_1, -16));
ptest(vec1, vec2);
- jccb(Assembler::notZero, TRUE_LABEL);
- jmpb(FALSE_LABEL);
+ jcc(Assembler::zero, DONE);
+ jmpb(TAIL_START);
- bind(COMPARE_TAIL); // len is zero
- movl(len, result);
+ bind(BREAK_LOOP);
+ // At least one byte in the last 16-byte vector is negative.
+ // Set up and look at the last 16 bytes as if they were a tail
+ lea(ary1, Address(ary1, len, Address::times_1));
+ addptr(result, len);
+ // Ignore the very last byte: if all others are positive,
+ // it must be negative, so we can skip right to the 2+1 byte
+ // end comparison at this point
+ orl(result, 15);
+ movl(len, 15);
// Fallthru to tail compare
}
}
+
+ bind(TAIL_START);
// Compare 4-byte vectors
andl(len, 0xfffffffc); // vector count (in bytes)
jccb(Assembler::zero, COMPARE_CHAR);
@@ -3553,34 +3588,45 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,
bind(COMPARE_VECTORS);
movl(tmp1, Address(ary1, len, Address::times_1));
andl(tmp1, 0x80808080);
- jccb(Assembler::notZero, TRUE_LABEL);
+ jccb(Assembler::notZero, TAIL_ADJUST);
addptr(len, 4);
- jcc(Assembler::notZero, COMPARE_VECTORS);
+ jccb(Assembler::notZero, COMPARE_VECTORS);
- // Compare trailing char (final 2 bytes), if any
+ // Compare trailing char (final 2-3 bytes), if any
bind(COMPARE_CHAR);
+
testl(result, 0x2); // tail char
jccb(Assembler::zero, COMPARE_BYTE);
load_unsigned_short(tmp1, Address(ary1, 0));
andl(tmp1, 0x00008080);
- jccb(Assembler::notZero, TRUE_LABEL);
- subptr(result, 2);
+ jccb(Assembler::notZero, CHAR_ADJUST);
lea(ary1, Address(ary1, 2));
bind(COMPARE_BYTE);
testl(result, 0x1); // tail byte
- jccb(Assembler::zero, FALSE_LABEL);
+ jccb(Assembler::zero, DONE);
load_unsigned_byte(tmp1, Address(ary1, 0));
- andl(tmp1, 0x00000080);
- jccb(Assembler::notEqual, TRUE_LABEL);
- jmpb(FALSE_LABEL);
-
- bind(TRUE_LABEL);
- movl(result, 1); // return true
+ testl(tmp1, 0x00000080);
+ jccb(Assembler::zero, DONE);
+ subptr(result, 1);
jmpb(DONE);
- bind(FALSE_LABEL);
- xorl(result, result); // return false
+ bind(TAIL_ADJUST);
+ // there are negative bits in the last 4 byte block.
+ // Adjust result and check the next three bytes
+ addptr(result, len);
+ orl(result, 3);
+ lea(ary1, Address(ary1, len, Address::times_1));
+ jmpb(COMPARE_CHAR);
+
+ bind(CHAR_ADJUST);
+ // We are looking at a char + optional byte tail, and found that one
+ // of the bytes in the char is negative. Adjust the result, check the
+ // first byte and readjust if needed.
+ andl(result, 0xfffffffc);
+ testl(tmp1, 0x00000080); // little-endian, so lowest byte comes first
+ jccb(Assembler::notZero, DONE);
+ addptr(result, 1);
// That's it
bind(DONE);
@@ -3590,6 +3636,7 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,
vpxor(vec2, vec2);
}
}
+
// Compare char[] or byte[] arrays aligned to 4 bytes or substrings.
void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register ary2,
Register limit, Register result, Register chr,
@@ -4014,41 +4061,18 @@ void C2_MacroAssembler::masked_op(int ideal_opc, int mask_len, KRegister dst,
}
/*
- * Algorithm for vector D2L and F2I conversions:-
- * a) Perform vector D2L/F2I cast.
- * b) Choose fast path if none of the result vector lane contains 0x80000000 value.
- * It signifies that source value could be any of the special floating point
- * values(NaN,-Inf,Inf,Max,-Min).
- * c) Set destination to zero if source is NaN value.
- * d) Replace 0x80000000 with MaxInt if source lane contains a +ve value.
+ * Following routine handles special floating point values(NaN/Inf/-Inf/Max/Min) for casting operation.
+ * If src is NaN, the result is 0.
+ * If the src is negative infinity or any value less than or equal to the value of Integer.MIN_VALUE,
+ * the result is equal to the value of Integer.MIN_VALUE.
+ * If the src is positive infinity or any value greater than or equal to the value of Integer.MAX_VALUE,
+ * the result is equal to the value of Integer.MAX_VALUE.
*/
-
-void C2_MacroAssembler::vector_castD2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
- KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
- Register scratch, int vec_enc) {
+void C2_MacroAssembler::vector_cast_float_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
+ XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
+ Register scratch, AddressLiteral float_sign_flip,
+ int vec_enc) {
Label done;
- evcvttpd2qq(dst, src, vec_enc);
- evmovdqul(xtmp1, k0, double_sign_flip, false, vec_enc, scratch);
- evpcmpeqq(ktmp1, xtmp1, dst, vec_enc);
- kortestwl(ktmp1, ktmp1);
- jccb(Assembler::equal, done);
-
- vpxor(xtmp2, xtmp2, xtmp2, vec_enc);
- evcmppd(ktmp2, k0, src, src, Assembler::UNORD_Q, vec_enc);
- evmovdquq(dst, ktmp2, xtmp2, true, vec_enc);
-
- kxorwl(ktmp1, ktmp1, ktmp2);
- evcmppd(ktmp1, ktmp1, src, xtmp2, Assembler::NLT_UQ, vec_enc);
- vpternlogq(xtmp2, 0x11, xtmp1, xtmp1, vec_enc);
- evmovdquq(dst, ktmp1, xtmp2, true, vec_enc);
- bind(done);
-}
-
-void C2_MacroAssembler::vector_castF2I_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
- XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
- AddressLiteral float_sign_flip, Register scratch, int vec_enc) {
- Label done;
- vcvttps2dq(dst, src, vec_enc);
vmovdqu(xtmp1, float_sign_flip, scratch, vec_enc);
vpcmpeqd(xtmp2, dst, xtmp1, vec_enc);
vptest(xtmp2, xtmp2, vec_enc);
@@ -4073,11 +4097,11 @@ void C2_MacroAssembler::vector_castF2I_avx(XMMRegister dst, XMMRegister src, XMM
bind(done);
}
-void C2_MacroAssembler::vector_castF2I_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
- KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
- Register scratch, int vec_enc) {
+void C2_MacroAssembler::vector_cast_float_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
+ XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2,
+ Register scratch, AddressLiteral float_sign_flip,
+ int vec_enc) {
Label done;
- vcvttps2dq(dst, src, vec_enc);
evmovdqul(xtmp1, k0, float_sign_flip, false, vec_enc, scratch);
Assembler::evpcmpeqd(ktmp1, k0, xtmp1, dst, vec_enc);
kortestwl(ktmp1, ktmp1);
@@ -4094,6 +4118,115 @@ void C2_MacroAssembler::vector_castF2I_evex(XMMRegister dst, XMMRegister src, XM
bind(done);
}
+/*
+ * Following routine handles special floating point values(NaN/Inf/-Inf/Max/Min) for casting operation.
+ * If src is NaN, the result is 0.
+ * If the src is negative infinity or any value less than or equal to the value of Long.MIN_VALUE,
+ * the result is equal to the value of Long.MIN_VALUE.
+ * If the src is positive infinity or any value greater than or equal to the value of Long.MAX_VALUE,
+ * the result is equal to the value of Long.MAX_VALUE.
+ */
+void C2_MacroAssembler::vector_cast_double_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
+ XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2,
+ Register scratch, AddressLiteral double_sign_flip,
+ int vec_enc) {
+ Label done;
+ evmovdqul(xtmp1, k0, double_sign_flip, false, vec_enc, scratch);
+ evpcmpeqq(ktmp1, xtmp1, dst, vec_enc);
+ kortestwl(ktmp1, ktmp1);
+ jccb(Assembler::equal, done);
+
+ vpxor(xtmp2, xtmp2, xtmp2, vec_enc);
+ evcmppd(ktmp2, k0, src, src, Assembler::UNORD_Q, vec_enc);
+ evmovdquq(dst, ktmp2, xtmp2, true, vec_enc);
+
+ kxorwl(ktmp1, ktmp1, ktmp2);
+ evcmppd(ktmp1, ktmp1, src, xtmp2, Assembler::NLT_UQ, vec_enc);
+ vpternlogq(xtmp2, 0x11, xtmp1, xtmp1, vec_enc);
+ evmovdquq(dst, ktmp1, xtmp2, true, vec_enc);
+ bind(done);
+}
+
+/*
+ * Algorithm for vector D2L and F2I conversions:-
+ * a) Perform vector D2L/F2I cast.
+ * b) Choose fast path if none of the result vector lane contains 0x80000000 value.
+ * It signifies that source value could be any of the special floating point
+ * values(NaN,-Inf,Inf,Max,-Min).
+ * c) Set destination to zero if source is NaN value.
+ * d) Replace 0x80000000 with MaxInt if source lane contains a +ve value.
+ */
+
+void C2_MacroAssembler::vector_castD2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
+ KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
+ Register scratch, int vec_enc) {
+ evcvttpd2qq(dst, src, vec_enc);
+ vector_cast_double_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, scratch, double_sign_flip, vec_enc);
+}
+
+void C2_MacroAssembler::vector_castF2I_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
+ XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
+ AddressLiteral float_sign_flip, Register scratch, int vec_enc) {
+ vcvttps2dq(dst, src, vec_enc);
+ vector_cast_float_special_cases_avx(dst, src, xtmp1, xtmp2, xtmp3, xtmp4, scratch, float_sign_flip, vec_enc);
+}
+
+void C2_MacroAssembler::vector_castF2I_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
+ KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
+ Register scratch, int vec_enc) {
+ vcvttps2dq(dst, src, vec_enc);
+ vector_cast_float_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, scratch, float_sign_flip, vec_enc);
+}
+
+#ifdef _LP64
+void C2_MacroAssembler::vector_round_double_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
+ KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
+ AddressLiteral new_mxcsr, Register scratch, int vec_enc) {
+ // Perform floor(val+0.5) operation under the influence of MXCSR.RC mode roundTowards -inf.
+ // and re-instantiate original MXCSR.RC mode after that.
+ ExternalAddress mxcsr_std(StubRoutines::x86::addr_mxcsr_std());
+ ldmxcsr(new_mxcsr, scratch);
+ mov64(scratch, julong_cast(0.5L));
+ evpbroadcastq(xtmp1, scratch, vec_enc);
+ vaddpd(xtmp1, src , xtmp1, vec_enc);
+ evcvtpd2qq(dst, xtmp1, vec_enc);
+ vector_cast_double_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, scratch, double_sign_flip, vec_enc);
+ ldmxcsr(mxcsr_std, scratch);
+}
+
+void C2_MacroAssembler::vector_round_float_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
+ KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
+ AddressLiteral new_mxcsr, Register scratch, int vec_enc) {
+ // Perform floor(val+0.5) operation under the influence of MXCSR.RC mode roundTowards -inf.
+ // and re-instantiate original MXCSR.RC mode after that.
+ ExternalAddress mxcsr_std(StubRoutines::x86::addr_mxcsr_std());
+ ldmxcsr(new_mxcsr, scratch);
+ movl(scratch, jint_cast(0.5));
+ movq(xtmp1, scratch);
+ vbroadcastss(xtmp1, xtmp1, vec_enc);
+ vaddps(xtmp1, src , xtmp1, vec_enc);
+ vcvtps2dq(dst, xtmp1, vec_enc);
+ vector_cast_float_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, scratch, float_sign_flip, vec_enc);
+ ldmxcsr(mxcsr_std, scratch);
+}
+
+void C2_MacroAssembler::vector_round_float_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
+ XMMRegister xtmp3, XMMRegister xtmp4, AddressLiteral float_sign_flip,
+ AddressLiteral new_mxcsr, Register scratch, int vec_enc) {
+ // Perform floor(val+0.5) operation under the influence of MXCSR.RC mode roundTowards -inf.
+ // and re-instantiate original MXCSR.RC mode after that.
+ ExternalAddress mxcsr_std(StubRoutines::x86::addr_mxcsr_std());
+ ldmxcsr(new_mxcsr, scratch);
+ movl(scratch, jint_cast(0.5));
+ movq(xtmp1, scratch);
+ vbroadcastss(xtmp1, xtmp1, vec_enc);
+ vaddps(xtmp1, src , xtmp1, vec_enc);
+ vcvtps2dq(dst, xtmp1, vec_enc);
+ vector_cast_float_special_cases_avx(dst, src, xtmp1, xtmp2, xtmp3, xtmp4, scratch, float_sign_flip, vec_enc);
+ ldmxcsr(mxcsr_std, scratch);
+}
+#endif
+
void C2_MacroAssembler::vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
BasicType from_elem_bt, BasicType to_elem_bt) {
switch (from_elem_bt) {
@@ -4321,6 +4454,94 @@ void C2_MacroAssembler::vector_maskall_operation(KRegister dst, Register src, in
}
}
+
+//
+// Following is lookup table based popcount computation algorithm:-
+// Index Bit set count
+// [ 0000 -> 0,
+// 0001 -> 1,
+// 0010 -> 1,
+// 0011 -> 2,
+// 0100 -> 1,
+// 0101 -> 2,
+// 0110 -> 2,
+// 0111 -> 3,
+// 1000 -> 1,
+// 1001 -> 2,
+// 1010 -> 3,
+// 1011 -> 3,
+// 1100 -> 2,
+// 1101 -> 3,
+// 1111 -> 4 ]
+// a. Count the number of 1s in 4 LSB bits of each byte. These bits are used as
+// shuffle indices for lookup table access.
+// b. Right shift each byte of vector lane by 4 positions.
+// c. Count the number of 1s in 4 MSB bits each byte. These bits are used as
+// shuffle indices for lookup table access.
+// d. Add the bitset count of upper and lower 4 bits of each byte.
+// e. Unpack double words to quad words and compute sum of absolute difference of bitset
+// count of all the bytes of a quadword.
+// f. Perform step e. for upper 128bit vector lane.
+// g. Pack the bitset count of quadwords back to double word.
+// h. Unpacking and packing operations are not needed for 64bit vector lane.
+void C2_MacroAssembler::vector_popcount_int(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
+ XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,
+ int vec_enc) {
+ if (VM_Version::supports_avx512_vpopcntdq()) {
+ vpopcntd(dst, src, vec_enc);
+ } else {
+ assert((vec_enc == Assembler::AVX_512bit && VM_Version::supports_avx512bw()) || VM_Version::supports_avx2(), "");
+ movl(rtmp, 0x0F0F0F0F);
+ movdl(xtmp1, rtmp);
+ vpbroadcastd(xtmp1, xtmp1, vec_enc);
+ if (Assembler::AVX_512bit == vec_enc) {
+ evmovdqul(xtmp2, k0, ExternalAddress(StubRoutines::x86::vector_popcount_lut()), false, vec_enc, rtmp);
+ } else {
+ vmovdqu(xtmp2, ExternalAddress(StubRoutines::x86::vector_popcount_lut()), rtmp);
+ }
+ vpand(xtmp3, src, xtmp1, vec_enc);
+ vpshufb(xtmp3, xtmp2, xtmp3, vec_enc);
+ vpsrlw(dst, src, 4, vec_enc);
+ vpand(dst, dst, xtmp1, vec_enc);
+ vpshufb(dst, xtmp2, dst, vec_enc);
+ vpaddb(xtmp3, dst, xtmp3, vec_enc);
+ vpxor(xtmp1, xtmp1, xtmp1, vec_enc);
+ vpunpckhdq(dst, xtmp3, xtmp1, vec_enc);
+ vpsadbw(dst, dst, xtmp1, vec_enc);
+ vpunpckldq(xtmp2, xtmp3, xtmp1, vec_enc);
+ vpsadbw(xtmp2, xtmp2, xtmp1, vec_enc);
+ vpackuswb(dst, xtmp2, dst, vec_enc);
+ }
+}
+
+void C2_MacroAssembler::vector_popcount_long(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
+ XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,
+ int vec_enc) {
+ if (VM_Version::supports_avx512_vpopcntdq()) {
+ vpopcntq(dst, src, vec_enc);
+ } else if (vec_enc == Assembler::AVX_512bit) {
+ assert(VM_Version::supports_avx512bw(), "");
+ movl(rtmp, 0x0F0F0F0F);
+ movdl(xtmp1, rtmp);
+ vpbroadcastd(xtmp1, xtmp1, vec_enc);
+ evmovdqul(xtmp2, k0, ExternalAddress(StubRoutines::x86::vector_popcount_lut()), true, vec_enc, rtmp);
+ vpandq(xtmp3, src, xtmp1, vec_enc);
+ vpshufb(xtmp3, xtmp2, xtmp3, vec_enc);
+ vpsrlw(dst, src, 4, vec_enc);
+ vpandq(dst, dst, xtmp1, vec_enc);
+ vpshufb(dst, xtmp2, dst, vec_enc);
+ vpaddb(xtmp3, dst, xtmp3, vec_enc);
+ vpxorq(xtmp1, xtmp1, xtmp1, vec_enc);
+ vpsadbw(dst, xtmp3, xtmp1, vec_enc);
+ } else {
+ // We do not see any performance benefit of running
+ // above instruction sequence on 256 bit vector which
+ // can operate over maximum 4 long elements.
+ ShouldNotReachHere();
+ }
+ evpmovqd(dst, dst, vec_enc);
+}
+
#ifndef _LP64
void C2_MacroAssembler::vector_maskall_operation32(KRegister dst, Register src, KRegister tmp, int mask_len) {
assert(VM_Version::supports_avx512bw(), "");
diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
index 0e6a381430f2ae087e7a3a4eaa3c592b0a248fd5..5f8e38a93b2395921f2ed094bdd248af7cb6245c 100644
--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
@@ -271,11 +271,10 @@ public:
XMMRegister vec1, int ae, KRegister mask = knoreg);
// Search for Non-ASCII character (Negative byte value) in a byte array,
- // return true if it has any and false otherwise.
- void has_negatives(Register ary1, Register len,
- Register result, Register tmp1,
- XMMRegister vec1, XMMRegister vec2, KRegister mask1 = knoreg, KRegister mask2 = knoreg);
-
+ // return index of the first such character, otherwise len.
+ void count_positives(Register ary1, Register len,
+ Register result, Register tmp1,
+ XMMRegister vec1, XMMRegister vec2, KRegister mask1 = knoreg, KRegister mask2 = knoreg);
// Compare char[] or byte[] arrays.
void arrays_equals(bool is_array_equ, Register ary1, Register ary2,
Register limit, Register result, Register chr,
@@ -304,6 +303,7 @@ public:
KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
Register scratch, int vec_enc);
+
void vector_castD2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
Register scratch, int vec_enc);
@@ -311,10 +311,45 @@ public:
void vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
BasicType from_elem_bt, BasicType to_elem_bt);
+ void vector_cast_double_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
+ KRegister ktmp1, KRegister ktmp2, Register scratch, AddressLiteral double_sign_flip,
+ int vec_enc);
+
+ void vector_cast_float_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
+ KRegister ktmp1, KRegister ktmp2, Register scratch, AddressLiteral float_sign_flip,
+ int vec_enc);
+
+ void vector_cast_float_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
+ XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
+ Register scratch, AddressLiteral float_sign_flip,
+ int vec_enc);
+
+#ifdef _LP64
+ void vector_round_double_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
+ KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
+ AddressLiteral new_mxcsr, Register scratch, int vec_enc);
+
+ void vector_round_float_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
+ KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
+ AddressLiteral new_mxcsr, Register scratch, int vec_enc);
+
+ void vector_round_float_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
+ XMMRegister xtmp3, XMMRegister xtmp4, AddressLiteral float_sign_flip,
+ AddressLiteral new_mxcsr, Register scratch, int vec_enc);
+#endif
+
void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, XMMRegister src3,
bool merge, BasicType bt, int vlen_enc);
void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, Address src3,
bool merge, BasicType bt, int vlen_enc);
+ void vector_popcount_int(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
+ XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,
+ int vec_enc);
+
+ void vector_popcount_long(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
+ XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,
+ int vec_enc);
+
#endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP
diff --git a/src/hotspot/cpu/x86/frame_x86.inline.hpp b/src/hotspot/cpu/x86/frame_x86.inline.hpp
index 733a357d5fe3e7fe9a9e445b64b1ebd4b7c381ea..23072238e16aa3caf798e6f0690f3e5f4ca9ccaf 100644
--- a/src/hotspot/cpu/x86/frame_x86.inline.hpp
+++ b/src/hotspot/cpu/x86/frame_x86.inline.hpp
@@ -138,10 +138,13 @@ inline intptr_t* frame::id(void) const { return unextended_sp(); }
inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id");
return this->id() > id ; }
-
-
inline intptr_t* frame::link() const { return (intptr_t*) *(intptr_t **)addr_at(link_offset); }
+inline intptr_t* frame::link_or_null() const {
+ intptr_t** ptr = (intptr_t **)addr_at(link_offset);
+ return os::is_readable_pointer(ptr) ? *ptr : NULL;
+}
+
inline intptr_t* frame::unextended_sp() const { return _unextended_sp; }
// Return address:
diff --git a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp
index 6525b13c5c253e54b3b2a4bce288a506ed582304..475a92d0f43a5264b37765917bb07f7c764f8c1e 100644
--- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp
@@ -67,7 +67,7 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm
__ jcc(Assembler::equal, filtered);
- __ pusha(); // push registers
+ __ push_call_clobbered_registers(false /* save_fpu */);
#ifdef _LP64
if (count == c_rarg0) {
if (addr == c_rarg1) {
@@ -90,7 +90,7 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry),
addr, count);
#endif
- __ popa();
+ __ pop_call_clobbered_registers(false /* save_fpu */);
__ bind(filtered);
}
@@ -98,7 +98,7 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm
void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register addr, Register count, Register tmp) {
- __ pusha(); // push registers (overkill)
+ __ push_call_clobbered_registers(false /* save_fpu */);
#ifdef _LP64
if (c_rarg0 == count) { // On win64 c_rarg0 == rcx
assert_different_registers(c_rarg1, addr);
@@ -114,7 +114,7 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry),
addr, count);
#endif
- __ popa();
+ __ pop_call_clobbered_registers(false /* save_fpu */);
}
void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
@@ -204,14 +204,15 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
__ jmp(done);
__ bind(runtime);
- // save the live input values
- if(tosca_live) __ push(rax);
- if (obj != noreg && obj != rax)
- __ push(obj);
+ // Determine and save the live input values
+ RegSet saved;
+ if (tosca_live) saved += RegSet::of(rax);
+ if (obj != noreg && obj != rax) saved += RegSet::of(obj);
+ if (pre_val != rax) saved += RegSet::of(pre_val);
+ NOT_LP64( saved += RegSet::of(thread); )
- if (pre_val != rax)
- __ push(pre_val);
+ __ push_set(saved);
// Calling the runtime using the regular call_VM_leaf mechanism generates
// code (generated by InterpreterMacroAssember::call_VM_leaf_base)
@@ -225,8 +226,6 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
// So when we do not have have a full interpreter frame on the stack
// expand_call should be passed true.
- NOT_LP64( __ push(thread); )
-
if (expand_call) {
LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); )
#ifdef _LP64
@@ -244,17 +243,7 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
} else {
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
}
-
- NOT_LP64( __ pop(thread); )
-
- // save the live input values
- if (pre_val != rax)
- __ pop(pre_val);
-
- if (obj != noreg && obj != rax)
- __ pop(obj);
-
- if(tosca_live) __ pop(rax);
+ __ pop_set(saved);
__ bind(done);
}
@@ -328,21 +317,16 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
__ bind(runtime);
// save the live input values
- __ push(store_addr);
-#ifdef _LP64
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, r15_thread);
-#else
- __ push(thread);
+ RegSet saved = RegSet::of(store_addr NOT_LP64(COMMA thread));
+ __ push_set(saved);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
- __ pop(thread);
-#endif
- __ pop(store_addr);
+ __ pop_set(saved);
__ bind(done);
}
void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
- Address dst, Register val, Register tmp1, Register tmp2) {
+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
bool in_heap = (decorators & IN_HEAP) != 0;
bool as_normal = (decorators & AS_NORMAL) != 0;
assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported");
@@ -350,7 +334,6 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco
bool needs_pre_barrier = as_normal;
bool needs_post_barrier = val != noreg && in_heap;
- Register tmp3 = LP64_ONLY(r8) NOT_LP64(rsi);
Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx);
// flatten object address if needed
// We do it regardless of precise because we need the registers
@@ -379,7 +362,7 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco
false /* expand_call */);
}
if (val == noreg) {
- BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg);
+ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg, noreg);
} else {
Register new_val = val;
if (needs_post_barrier) {
@@ -389,7 +372,7 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco
__ movptr(new_val, val);
}
}
- BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg);
+ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg, noreg);
if (needs_post_barrier) {
g1_write_barrier_post(masm /*masm*/,
tmp1 /* store_adr */,
@@ -496,13 +479,13 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
__ bind(runtime);
- __ save_live_registers_no_oop_map(true);
+ __ push_call_clobbered_registers();
// load the pre-value
__ load_parameter(0, rcx);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), rcx, thread);
- __ restore_live_registers(true);
+ __ pop_call_clobbered_registers();
__ bind(done);
@@ -515,9 +498,6 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
__ prologue("g1_post_barrier", false);
- // arg0: store_address
- Address store_addr(rbp, 2*BytesPerWord);
-
CardTableBarrierSet* ct =
barrier_set_cast(BarrierSet::barrier_set());
@@ -573,12 +553,11 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler*
__ jmp(enqueued);
__ bind(runtime);
-
- __ save_live_registers_no_oop_map(true);
+ __ push_call_clobbered_registers();
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
- __ restore_live_registers(true);
+ __ pop_call_clobbered_registers();
__ bind(enqueued);
__ pop(rdx);
diff --git a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.hpp
index 94bbadc7b2b14622e7a168a6641d6615200bfe2f..a5695f5657a4ad6a10ed8fc1687959f6b55f2ecb 100644
--- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.hpp
@@ -54,7 +54,7 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
Register tmp2);
virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
- Address dst, Register val, Register tmp1, Register tmp2);
+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
public:
void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
diff --git a/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp
index 55823bdf217c33b059b7066d38e310fd61056d2d..930926bbb17652308db427ae09242dba1db94451 100644
--- a/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp
@@ -103,7 +103,7 @@ void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators,
}
void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
- Address dst, Register val, Register tmp1, Register tmp2) {
+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
bool in_heap = (decorators & IN_HEAP) != 0;
bool in_native = (decorators & IN_NATIVE) != 0;
bool is_not_null = (decorators & IS_NOT_NULL) != 0;
diff --git a/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.hpp
index 3c63c00e4dbcb8b4fe1fa1c5e34b84684d9691e7..085238d60b55f2caa4dde806b5409cd5864d8a35 100644
--- a/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.hpp
@@ -47,7 +47,7 @@ public:
virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Register dst, Address src, Register tmp1, Register tmp_thread);
virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
- Address dst, Register val, Register tmp1, Register tmp2);
+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
// Support for jniFastGetField to try resolving a jobject/jweak in native
virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
diff --git a/src/hotspot/cpu/x86/gc/shared/cardTableBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/shared/cardTableBarrierSetAssembler_x86.cpp
index 7fc36ffae8f0ba32a025bbf2cf81aa71eb85c378..f314cac5980b7f3c9e44ad888383a1139ab1c58d 100644
--- a/src/hotspot/cpu/x86/gc/shared/cardTableBarrierSetAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/shared/cardTableBarrierSetAssembler_x86.cpp
@@ -128,7 +128,7 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob
}
void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
- Address dst, Register val, Register tmp1, Register tmp2) {
+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
bool in_heap = (decorators & IN_HEAP) != 0;
bool is_array = (decorators & IS_ARRAY) != 0;
@@ -137,7 +137,7 @@ void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorS
bool needs_post_barrier = val != noreg && in_heap;
- BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg);
+ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg, noreg);
if (needs_post_barrier) {
// flatten object address if needed
if (!precise || (dst.index() == noreg && dst.disp() == 0)) {
diff --git a/src/hotspot/cpu/x86/gc/shared/cardTableBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/shared/cardTableBarrierSetAssembler_x86.hpp
index a65286bd5996734f49e47f0f2137f27676d3c2f6..4760b222977a81b9e8febd93701786409860835d 100644
--- a/src/hotspot/cpu/x86/gc/shared/cardTableBarrierSetAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/gc/shared/cardTableBarrierSetAssembler_x86.hpp
@@ -35,7 +35,7 @@ protected:
virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp);
virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
- Address dst, Register val, Register tmp1, Register tmp2);
+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
};
#endif // CPU_X86_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_X86_HPP
diff --git a/src/hotspot/cpu/x86/gc/shared/modRefBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/shared/modRefBarrierSetAssembler_x86.cpp
index 9325ab7ecf9c711605f1fe75d637782d2fecdcca..618095bdfa634b8c3a7cdccab5e280ae96668c1b 100644
--- a/src/hotspot/cpu/x86/gc/shared/modRefBarrierSetAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/shared/modRefBarrierSetAssembler_x86.cpp
@@ -84,10 +84,10 @@ void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, Decorat
}
void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
- Address dst, Register val, Register tmp1, Register tmp2) {
+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
if (is_reference_type(type)) {
- oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2);
+ oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
} else {
- BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
+ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
}
}
diff --git a/src/hotspot/cpu/x86/gc/shared/modRefBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/shared/modRefBarrierSetAssembler_x86.hpp
index 39950225bfe736a71f7b6dc34a021e78357b110e..c8b5043256ad203bed7d16f7ead5e188c91cbb45 100644
--- a/src/hotspot/cpu/x86/gc/shared/modRefBarrierSetAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/gc/shared/modRefBarrierSetAssembler_x86.hpp
@@ -39,7 +39,7 @@ protected:
virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register addr, Register count, Register tmp) {}
virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
- Address dst, Register val, Register tmp1, Register tmp2) = 0;
+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) = 0;
public:
virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Register src, Register dst, Register count);
@@ -47,7 +47,7 @@ public:
Register src, Register dst, Register count);
virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
- Address dst, Register val, Register tmp1, Register tmp2);
+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
};
#endif // CPU_X86_GC_SHARED_MODREFBARRIERSETASSEMBLER_X86_HPP
diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
index 64169b015293084fc4a9ec692c5d88977d38af6a..d213e6fda394e6796bddf81a2a22aef405026b66 100644
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
@@ -591,7 +591,7 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet d
}
void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
- Address dst, Register val, Register tmp1, Register tmp2) {
+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
bool on_oop = is_reference_type(type);
bool in_heap = (decorators & IN_HEAP) != 0;
@@ -599,7 +599,6 @@ void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet
if (on_oop && in_heap) {
bool needs_pre_barrier = as_normal;
- Register tmp3 = LP64_ONLY(r8) NOT_LP64(rsi);
Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx);
// flatten object address if needed
// We do it regardless of precise because we need the registers
@@ -629,14 +628,14 @@ void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet
false /* expand_call */);
}
if (val == noreg) {
- BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg);
+ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg, noreg);
} else {
iu_barrier(masm, val, tmp3);
- BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg);
+ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg, noreg);
}
NOT_LP64(imasm->restore_bcp());
} else {
- BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
+ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
}
}
diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp
index 2a8c0862b9e6380c85c18570693e8e3d483fc655..47dfe1449280259f524318b1adf4fc4b573787c8 100644
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp
@@ -77,7 +77,7 @@ public:
virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Register dst, Address src, Register tmp1, Register tmp_thread);
virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
- Address dst, Register val, Register tmp1, Register tmp2);
+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
Register obj, Register tmp, Label& slowpath);
};
diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
index 8c74db952e8a7e611f86a0de89615449dd96d5ce..00071d66da34166365cd8e10f56d832988295377 100644
--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
@@ -193,7 +193,8 @@ void ZBarrierSetAssembler::store_at(MacroAssembler* masm,
Address dst,
Register src,
Register tmp1,
- Register tmp2) {
+ Register tmp2,
+ Register tmp3) {
BLOCK_COMMENT("ZBarrierSetAssembler::store_at {");
// Verify oop store
@@ -211,7 +212,7 @@ void ZBarrierSetAssembler::store_at(MacroAssembler* masm,
}
// Store value
- BarrierSetAssembler::store_at(masm, decorators, type, dst, src, tmp1, tmp2);
+ BarrierSetAssembler::store_at(masm, decorators, type, dst, src, tmp1, tmp2, tmp3);
BLOCK_COMMENT("} ZBarrierSetAssembler::store_at");
}
diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
index 134f7e6c9e2e5951a63a8a816c3d1ddd311332df..2446bd1e46a73357d47f9fda7a95828d9a13df8a 100644
--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
@@ -61,7 +61,8 @@ public:
Address dst,
Register src,
Register tmp1,
- Register tmp2);
+ Register tmp2,
+ Register tmp3);
#endif // ASSERT
virtual void arraycopy_prologue(MacroAssembler* masm,
diff --git a/src/hotspot/cpu/x86/interp_masm_x86.cpp b/src/hotspot/cpu/x86/interp_masm_x86.cpp
index bf8b94a6319dbacfe7a14d0228fb3fb50689a3f8..34d4178b8da4c39e4412428ddb1cb222098532ce 100644
--- a/src/hotspot/cpu/x86/interp_masm_x86.cpp
+++ b/src/hotspot/cpu/x86/interp_masm_x86.cpp
@@ -1972,19 +1972,18 @@ void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) {
#endif
}
-// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
-void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
- int increment, Address mask,
- Register scratch, bool preloaded,
- Condition cond, Label* where) {
- if (!preloaded) {
- movl(scratch, counter_addr);
- }
- incrementl(scratch, increment);
+// Jump if ((*counter_addr += increment) & mask) == 0
+void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, Address mask,
+ Register scratch, Label* where) {
+ // This update is actually not atomic and can lose a number of updates
+ // under heavy contention, but the alternative of using the (contended)
+ // atomic update here penalizes profiling paths too much.
+ movl(scratch, counter_addr);
+ incrementl(scratch, InvocationCounter::count_increment);
movl(counter_addr, scratch);
andl(scratch, mask);
if (where != NULL) {
- jcc(cond, *where);
+ jcc(Assembler::zero, *where);
}
}
diff --git a/src/hotspot/cpu/x86/interp_masm_x86.hpp b/src/hotspot/cpu/x86/interp_masm_x86.hpp
index 0aecb6b4a25e6bf47d3876060c7940e7bb276003..a94f35426b8bcaa186f7e3b54c8c8314fc4e59d5 100644
--- a/src/hotspot/cpu/x86/interp_masm_x86.hpp
+++ b/src/hotspot/cpu/x86/interp_masm_x86.hpp
@@ -248,10 +248,8 @@ class InterpreterMacroAssembler: public MacroAssembler {
bool decrement = false);
void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
bool decrement = false);
- void increment_mask_and_jump(Address counter_addr,
- int increment, Address mask,
- Register scratch, bool preloaded,
- Condition cond, Label* where);
+ void increment_mask_and_jump(Address counter_addr, Address mask,
+ Register scratch, Label* where);
void set_mdp_flag_at(Register mdp_in, int flag_constant);
void test_mdp_data_at(Register mdp_in, int offset, Register value,
Register test_value_out,
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
index 10a1cb4b6a1a0c094566558413e86fbdc0a9beff..855c855089d2f9531e14b5313111bcb6e52a7aa6 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
@@ -332,21 +332,6 @@ void MacroAssembler::movptr(Address dst, intptr_t src) {
movl(dst, src);
}
-
-void MacroAssembler::pop_callee_saved_registers() {
- pop(rcx);
- pop(rdx);
- pop(rdi);
- pop(rsi);
-}
-
-void MacroAssembler::push_callee_saved_registers() {
- push(rsi);
- push(rdi);
- push(rdx);
- push(rcx);
-}
-
void MacroAssembler::pushoop(jobject obj) {
push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate());
}
@@ -2267,12 +2252,12 @@ void MacroAssembler::fld_x(AddressLiteral src) {
Assembler::fld_x(as_Address(src));
}
-void MacroAssembler::ldmxcsr(AddressLiteral src) {
+void MacroAssembler::ldmxcsr(AddressLiteral src, Register scratchReg) {
if (reachable(src)) {
Assembler::ldmxcsr(as_Address(src));
} else {
- lea(rscratch1, src);
- Assembler::ldmxcsr(Address(rscratch1, 0));
+ lea(scratchReg, src);
+ Assembler::ldmxcsr(Address(scratchReg, 0));
}
}
@@ -3593,6 +3578,191 @@ void MacroAssembler::tlab_allocate(Register thread, Register obj,
bs->tlab_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
}
+RegSet MacroAssembler::call_clobbered_gp_registers() {
+ RegSet regs;
+#ifdef _LP64
+ regs += RegSet::of(rax, rcx, rdx);
+#ifndef WINDOWS
+ regs += RegSet::of(rsi, rdi);
+#endif
+ regs += RegSet::range(r8, r11);
+#else
+ regs += RegSet::of(rax, rcx, rdx);
+#endif
+ return regs;
+}
+
+XMMRegSet MacroAssembler::call_clobbered_xmm_registers() {
+ int num_xmm_registers = XMMRegisterImpl::available_xmm_registers();
+#if defined(WINDOWS) && defined(_LP64)
+ XMMRegSet result = XMMRegSet::range(xmm0, xmm5);
+ if (num_xmm_registers > 16) {
+ result += XMMRegSet::range(xmm16, as_XMMRegister(num_xmm_registers - 1));
+ }
+ return result;
+#else
+ return XMMRegSet::range(xmm0, as_XMMRegister(num_xmm_registers - 1));
+#endif
+}
+
+static int FPUSaveAreaSize = align_up(108, StackAlignmentInBytes); // 108 bytes needed for FPU state by fsave/frstor
+
+#ifndef _LP64
+static bool use_x87_registers() { return UseSSE < 2; }
+#endif
+static bool use_xmm_registers() { return UseSSE >= 1; }
+
+// C1 only ever uses the first double/float of the XMM register.
+static int xmm_save_size() { return UseSSE >= 2 ? sizeof(double) : sizeof(float); }
+
+static void save_xmm_register(MacroAssembler* masm, int offset, XMMRegister reg) {
+ if (UseSSE == 1) {
+ masm->movflt(Address(rsp, offset), reg);
+ } else {
+ masm->movdbl(Address(rsp, offset), reg);
+ }
+}
+
+static void restore_xmm_register(MacroAssembler* masm, int offset, XMMRegister reg) {
+ if (UseSSE == 1) {
+ masm->movflt(reg, Address(rsp, offset));
+ } else {
+ masm->movdbl(reg, Address(rsp, offset));
+ }
+}
+
+int register_section_sizes(RegSet gp_registers, XMMRegSet xmm_registers, bool save_fpu,
+ int& gp_area_size, int& fp_area_size, int& xmm_area_size) {
+
+ gp_area_size = align_up(gp_registers.size() * RegisterImpl::max_slots_per_register * VMRegImpl::stack_slot_size,
+ StackAlignmentInBytes);
+#ifdef _LP64
+ fp_area_size = 0;
+#else
+ fp_area_size = (save_fpu && use_x87_registers()) ? FPUSaveAreaSize : 0;
+#endif
+ xmm_area_size = (save_fpu && use_xmm_registers()) ? xmm_registers.size() * xmm_save_size() : 0;
+
+ return gp_area_size + fp_area_size + xmm_area_size;
+}
+
+void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude, bool save_fpu) {
+ block_comment("push_call_clobbered_registers start");
+ // Regular registers
+ RegSet gp_registers_to_push = call_clobbered_gp_registers() - exclude;
+
+ int gp_area_size;
+ int fp_area_size;
+ int xmm_area_size;
+ int total_save_size = register_section_sizes(gp_registers_to_push, call_clobbered_xmm_registers(), save_fpu,
+ gp_area_size, fp_area_size, xmm_area_size);
+ subptr(rsp, total_save_size);
+
+ push_set(gp_registers_to_push, 0);
+
+#ifndef _LP64
+ if (save_fpu && use_x87_registers()) {
+ fnsave(Address(rsp, gp_area_size));
+ fwait();
+ }
+#endif
+ if (save_fpu && use_xmm_registers()) {
+ push_set(call_clobbered_xmm_registers(), gp_area_size + fp_area_size);
+ }
+
+ block_comment("push_call_clobbered_registers end");
+}
+
+void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude, bool restore_fpu) {
+ block_comment("pop_call_clobbered_registers start");
+
+ RegSet gp_registers_to_pop = call_clobbered_gp_registers() - exclude;
+
+ int gp_area_size;
+ int fp_area_size;
+ int xmm_area_size;
+ int total_save_size = register_section_sizes(gp_registers_to_pop, call_clobbered_xmm_registers(), restore_fpu,
+ gp_area_size, fp_area_size, xmm_area_size);
+
+ if (restore_fpu && use_xmm_registers()) {
+ pop_set(call_clobbered_xmm_registers(), gp_area_size + fp_area_size);
+ }
+#ifndef _LP64
+ if (restore_fpu && use_x87_registers()) {
+ frstor(Address(rsp, gp_area_size));
+ }
+#endif
+
+ pop_set(gp_registers_to_pop, 0);
+
+ addptr(rsp, total_save_size);
+
+ vzeroupper();
+
+ block_comment("pop_call_clobbered_registers end");
+}
+
+void MacroAssembler::push_set(XMMRegSet set, int offset) {
+ assert(is_aligned(set.size() * xmm_save_size(), StackAlignmentInBytes), "must be");
+ int spill_offset = offset;
+
+ for (RegSetIterator it = set.begin(); *it != xnoreg; ++it) {
+ save_xmm_register(this, spill_offset, *it);
+ spill_offset += xmm_save_size();
+ }
+}
+
+void MacroAssembler::pop_set(XMMRegSet set, int offset) {
+ int restore_size = set.size() * xmm_save_size();
+ assert(is_aligned(restore_size, StackAlignmentInBytes), "must be");
+
+ int restore_offset = offset + restore_size - xmm_save_size();
+
+ for (ReverseRegSetIterator it = set.rbegin(); *it != xnoreg; ++it) {
+ restore_xmm_register(this, restore_offset, *it);
+ restore_offset -= xmm_save_size();
+ }
+}
+
+void MacroAssembler::push_set(RegSet set, int offset) {
+ int spill_offset;
+ if (offset == -1) {
+ int register_push_size = set.size() * RegisterImpl::max_slots_per_register * VMRegImpl::stack_slot_size;
+ int aligned_size = align_up(register_push_size, StackAlignmentInBytes);
+ subptr(rsp, aligned_size);
+ spill_offset = 0;
+ } else {
+ spill_offset = offset;
+ }
+
+ for (RegSetIterator it = set.begin(); *it != noreg; ++it) {
+ movptr(Address(rsp, spill_offset), *it);
+ spill_offset += RegisterImpl::max_slots_per_register * VMRegImpl::stack_slot_size;
+ }
+}
+
+void MacroAssembler::pop_set(RegSet set, int offset) {
+
+ int gp_reg_size = RegisterImpl::max_slots_per_register * VMRegImpl::stack_slot_size;
+ int restore_size = set.size() * gp_reg_size;
+ int aligned_size = align_up(restore_size, StackAlignmentInBytes);
+
+ int restore_offset;
+ if (offset == -1) {
+ restore_offset = restore_size - gp_reg_size;
+ } else {
+ restore_offset = offset + restore_size - gp_reg_size;
+ }
+ for (ReverseRegSetIterator it = set.rbegin(); *it != noreg; ++it) {
+ movptr(*it, Address(rsp, restore_offset));
+ restore_offset -= gp_reg_size;
+ }
+
+ if (offset == -1) {
+ addptr(rsp, aligned_size);
+ }
+}
+
// Defines obj, preserves var_size_in_bytes
void MacroAssembler::eden_allocate(Register thread, Register obj,
Register var_size_in_bytes,
@@ -4605,14 +4775,14 @@ void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Reg
}
void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
- Register tmp1, Register tmp2) {
+ Register tmp1, Register tmp2, Register tmp3) {
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
decorators = AccessInternal::decorator_fixup(decorators);
bool as_raw = (decorators & AS_RAW) != 0;
if (as_raw) {
- bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2);
+ bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
} else {
- bs->store_at(this, decorators, type, dst, src, tmp1, tmp2);
+ bs->store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
}
}
@@ -4628,13 +4798,13 @@ void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register
}
void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
- Register tmp2, DecoratorSet decorators) {
- access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2);
+ Register tmp2, Register tmp3, DecoratorSet decorators) {
+ access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2, tmp3);
}
// Used for storing NULLs.
void MacroAssembler::store_heap_oop_null(Address dst) {
- access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg);
+ access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg);
}
#ifdef _LP64
@@ -8950,6 +9120,80 @@ void MacroAssembler::convert_f2l(Register dst, XMMRegister src) {
bind(done);
}
+void MacroAssembler::round_float(Register dst, XMMRegister src, Register rtmp, Register rcx) {
+ // Following code is line by line assembly translation rounding algorithm.
+ // Please refer to java.lang.Math.round(float) algorithm for details.
+ const int32_t FloatConsts_EXP_BIT_MASK = 0x7F800000;
+ const int32_t FloatConsts_SIGNIFICAND_WIDTH = 24;
+ const int32_t FloatConsts_EXP_BIAS = 127;
+ const int32_t FloatConsts_SIGNIF_BIT_MASK = 0x007FFFFF;
+ const int32_t MINUS_32 = 0xFFFFFFE0;
+ Label L_special_case, L_block1, L_exit;
+ movl(rtmp, FloatConsts_EXP_BIT_MASK);
+ movdl(dst, src);
+ andl(dst, rtmp);
+ sarl(dst, FloatConsts_SIGNIFICAND_WIDTH - 1);
+ movl(rtmp, FloatConsts_SIGNIFICAND_WIDTH - 2 + FloatConsts_EXP_BIAS);
+ subl(rtmp, dst);
+ movl(rcx, rtmp);
+ movl(dst, MINUS_32);
+ testl(rtmp, dst);
+ jccb(Assembler::notEqual, L_special_case);
+ movdl(dst, src);
+ andl(dst, FloatConsts_SIGNIF_BIT_MASK);
+ orl(dst, FloatConsts_SIGNIF_BIT_MASK + 1);
+ movdl(rtmp, src);
+ testl(rtmp, rtmp);
+ jccb(Assembler::greaterEqual, L_block1);
+ negl(dst);
+ bind(L_block1);
+ sarl(dst);
+ addl(dst, 0x1);
+ sarl(dst, 0x1);
+ jmp(L_exit);
+ bind(L_special_case);
+ convert_f2i(dst, src);
+ bind(L_exit);
+}
+
+void MacroAssembler::round_double(Register dst, XMMRegister src, Register rtmp, Register rcx) {
+ // Following code is line by line assembly translation rounding algorithm.
+ // Please refer to java.lang.Math.round(double) algorithm for details.
+ const int64_t DoubleConsts_EXP_BIT_MASK = 0x7FF0000000000000L;
+ const int64_t DoubleConsts_SIGNIFICAND_WIDTH = 53;
+ const int64_t DoubleConsts_EXP_BIAS = 1023;
+ const int64_t DoubleConsts_SIGNIF_BIT_MASK = 0x000FFFFFFFFFFFFFL;
+ const int64_t MINUS_64 = 0xFFFFFFFFFFFFFFC0L;
+ Label L_special_case, L_block1, L_exit;
+ mov64(rtmp, DoubleConsts_EXP_BIT_MASK);
+ movq(dst, src);
+ andq(dst, rtmp);
+ sarq(dst, DoubleConsts_SIGNIFICAND_WIDTH - 1);
+ mov64(rtmp, DoubleConsts_SIGNIFICAND_WIDTH - 2 + DoubleConsts_EXP_BIAS);
+ subq(rtmp, dst);
+ movq(rcx, rtmp);
+ mov64(dst, MINUS_64);
+ testq(rtmp, dst);
+ jccb(Assembler::notEqual, L_special_case);
+ movq(dst, src);
+ mov64(rtmp, DoubleConsts_SIGNIF_BIT_MASK);
+ andq(dst, rtmp);
+ mov64(rtmp, DoubleConsts_SIGNIF_BIT_MASK + 1);
+ orq(dst, rtmp);
+ movq(rtmp, src);
+ testq(rtmp, rtmp);
+ jccb(Assembler::greaterEqual, L_block1);
+ negq(dst);
+ bind(L_block1);
+ sarq(dst);
+ addq(dst, 0x1);
+ sarq(dst, 0x1);
+ jmp(L_exit);
+ bind(L_special_case);
+ convert_d2l(dst, src);
+ bind(L_exit);
+}
+
void MacroAssembler::convert_d2l(Register dst, XMMRegister src) {
Label done;
cvttsd2siq(dst, src);
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
index 3593874866ca81157b1fa49f975034d866383443..303b6b0c83c1e8e2a8e9cd698ef7431909cea51d 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
@@ -26,6 +26,7 @@
#define CPU_X86_MACROASSEMBLER_X86_HPP
#include "asm/assembler.hpp"
+#include "asm/register.hpp"
#include "code/vmreg.inline.hpp"
#include "compiler/oopMap.hpp"
#include "utilities/macros.hpp"
@@ -345,14 +346,14 @@ class MacroAssembler: public Assembler {
void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
Register tmp1, Register thread_tmp);
void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
- Register tmp1, Register tmp2);
+ Register tmp1, Register tmp2, Register tmp3);
void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
Register thread_tmp = noreg, DecoratorSet decorators = 0);
void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
Register thread_tmp = noreg, DecoratorSet decorators = 0);
void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
- Register tmp2 = noreg, DecoratorSet decorators = 0);
+ Register tmp2 = noreg, Register tmp3 = noreg, DecoratorSet decorators = 0);
// Used for storing NULL. All other oop constants should be
// stored using routines that take a jobject.
@@ -521,9 +522,34 @@ class MacroAssembler: public Assembler {
// Round up to a power of two
void round_to(Register reg, int modulus);
- // Callee saved registers handling
- void push_callee_saved_registers();
- void pop_callee_saved_registers();
+private:
+ // General purpose and XMM registers potentially clobbered by native code; there
+ // is no need for FPU or AVX opmask related methods because C1/interpreter
+ // - we save/restore FPU state as a whole always
+ // - do not care about AVX-512 opmask
+ static RegSet call_clobbered_gp_registers();
+ static XMMRegSet call_clobbered_xmm_registers();
+
+ void push_set(XMMRegSet set, int offset);
+ void pop_set(XMMRegSet set, int offset);
+
+public:
+ void push_set(RegSet set, int offset = -1);
+ void pop_set(RegSet set, int offset = -1);
+
+ // Push and pop everything that might be clobbered by a native
+ // runtime call.
+ // Only save the lower 64 bits of each vector register.
+ // Additonal registers can be excluded in a passed RegSet.
+ void push_call_clobbered_registers_except(RegSet exclude, bool save_fpu = true);
+ void pop_call_clobbered_registers_except(RegSet exclude, bool restore_fpu = true);
+
+ void push_call_clobbered_registers(bool save_fpu = true) {
+ push_call_clobbered_registers_except(RegSet(), save_fpu);
+ }
+ void pop_call_clobbered_registers(bool restore_fpu = true) {
+ pop_call_clobbered_registers_except(RegSet(), restore_fpu);
+ }
// allocation
void eden_allocate(
@@ -880,7 +906,7 @@ class MacroAssembler: public Assembler {
void fld_x(AddressLiteral src);
void ldmxcsr(Address src) { Assembler::ldmxcsr(src); }
- void ldmxcsr(AddressLiteral src);
+ void ldmxcsr(AddressLiteral src, Register scratchReg = rscratch1);
#ifdef _LP64
private:
@@ -1968,6 +1994,8 @@ public:
void convert_d2i(Register dst, XMMRegister src);
void convert_f2l(Register dst, XMMRegister src);
void convert_d2l(Register dst, XMMRegister src);
+ void round_double(Register dst, XMMRegister src, Register rtmp, Register rcx);
+ void round_float(Register dst, XMMRegister src, Register rtmp, Register rcx);
void cache_wb(Address line);
void cache_wbsync(bool is_pre);
diff --git a/src/hotspot/cpu/x86/matcher_x86.hpp b/src/hotspot/cpu/x86/matcher_x86.hpp
index 61af24cf31c52261cbb2fd7149c880f1a22373fb..9711bc8c2c4368d36d616aacb13c54511ac67fe7 100644
--- a/src/hotspot/cpu/x86/matcher_x86.hpp
+++ b/src/hotspot/cpu/x86/matcher_x86.hpp
@@ -183,4 +183,13 @@
// Implements a variant of EncodeISOArrayNode that encode ASCII only
static const bool supports_encode_ascii_array = true;
+ // Returns pre-selection estimated cost of a vector operation.
+ static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
+ switch(vopc) {
+ default: return 0;
+ case Op_PopCountVI: return VM_Version::supports_avx512_vpopcntdq() ? 0 : 50;
+ case Op_PopCountVL: return VM_Version::supports_avx512_vpopcntdq() ? 0 : 40;
+ }
+ }
+
#endif // CPU_X86_MATCHER_X86_HPP
diff --git a/src/hotspot/cpu/x86/register_x86.cpp b/src/hotspot/cpu/x86/register_x86.cpp
index 2a07a9eb19c3b670cb8e86f59aa8e3cdd83fea8f..d86cb1f0820842e31dda08707520efb6bc79eab2 100644
--- a/src/hotspot/cpu/x86/register_x86.cpp
+++ b/src/hotspot/cpu/x86/register_x86.cpp
@@ -23,6 +23,7 @@
*/
#include "precompiled.hpp"
+
#include "register_x86.hpp"
#ifndef AMD64
diff --git a/src/hotspot/cpu/x86/register_x86.hpp b/src/hotspot/cpu/x86/register_x86.hpp
index b9ac28902407560b1d03df290b731b4f407ede7c..7bb0be2094395114d921d608c58b4cfab170069d 100644
--- a/src/hotspot/cpu/x86/register_x86.hpp
+++ b/src/hotspot/cpu/x86/register_x86.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -26,6 +26,9 @@
#define CPU_X86_REGISTER_X86_HPP
#include "asm/register.hpp"
+#include "runtime/globals.hpp"
+#include "utilities/count_leading_zeros.hpp"
+#include "utilities/powerOfTwo.hpp"
class VMRegImpl;
typedef VMRegImpl* VMReg;
@@ -135,7 +138,7 @@ inline XMMRegister as_XMMRegister(int encoding) {
}
-// The implementation of XMM registers for the IA32 architecture
+// The implementation of XMM registers.
class XMMRegisterImpl: public AbstractRegisterImpl {
public:
enum {
@@ -161,6 +164,18 @@ class XMMRegisterImpl: public AbstractRegisterImpl {
bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
const char* name() const;
const char* sub_word_name(int offset) const;
+
+ // Actually available XMM registers for use, depending on actual CPU capabilities
+ // and flags.
+ static int available_xmm_registers() {
+ int num_xmm_regs = XMMRegisterImpl::number_of_registers;
+#ifdef _LP64
+ if (UseAVX < 3) {
+ num_xmm_regs /= 2;
+ }
+#endif
+ return num_xmm_regs;
+ }
};
@@ -201,11 +216,7 @@ CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm30, (30));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm31, (31));
#endif // AMD64
-// Only used by the 32bit stubGenerator. These can't be described by vmreg and hence
-// can't be described in oopMaps and therefore can't be used by the compilers (at least
-// were deopt might wan't to see them).
-
-// Use XMMRegister as shortcut
+// Use KRegister as shortcut
class KRegisterImpl;
typedef KRegisterImpl* KRegister;
@@ -213,7 +224,7 @@ inline KRegister as_KRegister(int encoding) {
return (KRegister)(intptr_t)encoding;
}
-// The implementation of XMM registers for the IA32 architecture
+// The implementation of AVX-3 (AVX-512) opmask registers.
class KRegisterImpl : public AbstractRegisterImpl {
public:
enum {
@@ -276,4 +287,33 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
};
+template <>
+inline Register AbstractRegSet::first() {
+ uint32_t first = _bitset & -_bitset;
+ return first ? as_Register(exact_log2(first)) : noreg;
+}
+
+template <>
+inline Register AbstractRegSet::last() {
+ if (_bitset == 0) { return noreg; }
+ uint32_t last = 31 - count_leading_zeros(_bitset);
+ return as_Register(last);
+}
+
+template <>
+inline XMMRegister AbstractRegSet::first() {
+ uint32_t first = _bitset & -_bitset;
+ return first ? as_XMMRegister(exact_log2(first)) : xnoreg;
+}
+
+template <>
+inline XMMRegister AbstractRegSet::last() {
+ if (_bitset == 0) { return xnoreg; }
+ uint32_t last = 31 - count_leading_zeros(_bitset);
+ return as_XMMRegister(last);
+}
+
+typedef AbstractRegSet RegSet;
+typedef AbstractRegSet XMMRegSet;
+
#endif // CPU_X86_REGISTER_X86_HPP
diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
index 8bfbe3303da9e9a8f1dc0604cdbaaf6329d780c5..75ddae6319974969858e1a76e7e9b21ac2791495 100644
--- a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
+++ b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
@@ -174,10 +174,7 @@ PRAGMA_DIAG_PUSH
PRAGMA_NONNULL_IGNORED
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
int off = 0;
- int num_xmm_regs = XMMRegisterImpl::number_of_registers;
- if (UseAVX < 3) {
- num_xmm_regs = num_xmm_regs/2;
- }
+ int num_xmm_regs = XMMRegisterImpl::available_xmm_registers();
#if COMPILER2_OR_JVMCI
if (save_vectors && UseAVX == 0) {
save_vectors = false; // vectors larger than 16 byte long are supported only with AVX
@@ -367,10 +364,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
PRAGMA_DIAG_POP
void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
- int num_xmm_regs = XMMRegisterImpl::number_of_registers;
- if (UseAVX < 3) {
- num_xmm_regs = num_xmm_regs/2;
- }
+ int num_xmm_regs = XMMRegisterImpl::available_xmm_registers();
if (frame::arg_reg_save_area_bytes != 0) {
// Pop arg register save area
__ addptr(rsp, frame::arg_reg_save_area_bytes);
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp
index 1525d10e5b5f3dea743318ca097dd1b3098dd6f5..24cfc237b23591e90d3633e26529c4c1b5051d09 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp
@@ -588,6 +588,30 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ address generate_popcount_avx_lut(const char *stub_name) {
+ __ align64();
+ StubCodeMark mark(this, "StubRoutines", stub_name);
+ address start = __ pc();
+ __ emit_data(0x02010100, relocInfo::none, 0);
+ __ emit_data(0x03020201, relocInfo::none, 0);
+ __ emit_data(0x03020201, relocInfo::none, 0);
+ __ emit_data(0x04030302, relocInfo::none, 0);
+ __ emit_data(0x02010100, relocInfo::none, 0);
+ __ emit_data(0x03020201, relocInfo::none, 0);
+ __ emit_data(0x03020201, relocInfo::none, 0);
+ __ emit_data(0x04030302, relocInfo::none, 0);
+ __ emit_data(0x02010100, relocInfo::none, 0);
+ __ emit_data(0x03020201, relocInfo::none, 0);
+ __ emit_data(0x03020201, relocInfo::none, 0);
+ __ emit_data(0x04030302, relocInfo::none, 0);
+ __ emit_data(0x02010100, relocInfo::none, 0);
+ __ emit_data(0x03020201, relocInfo::none, 0);
+ __ emit_data(0x03020201, relocInfo::none, 0);
+ __ emit_data(0x04030302, relocInfo::none, 0);
+ return start;
+ }
+
+
address generate_iota_indices(const char *stub_name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
@@ -4004,6 +4028,11 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::x86::_vector_int_mask_cmp_bits = generate_vector_mask("vector_int_mask_cmp_bits", 0x00000001);
StubRoutines::x86::_vector_iota_indices = generate_iota_indices("iota_indices");
+ if (UsePopCountInstruction && VM_Version::supports_avx2() && !VM_Version::supports_avx512_vpopcntdq()) {
+ // lut implementation influenced by counting 1s algorithm from section 5-1 of Hackers' Delight.
+ StubRoutines::x86::_vector_popcount_lut = generate_popcount_avx_lut("popcount_lut");
+ }
+
// support for verify_oop (must happen after universe_init)
StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
index 8b7188ca42c88ff68959b723b177f950d8fa87bc..39d5cbe2fb4638c4e5104bc53c86a13848e85b4c 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
@@ -795,6 +795,21 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ address generate_popcount_avx_lut(const char *stub_name) {
+ __ align64();
+ StubCodeMark mark(this, "StubRoutines", stub_name);
+ address start = __ pc();
+ __ emit_data64(0x0302020102010100, relocInfo::none);
+ __ emit_data64(0x0403030203020201, relocInfo::none);
+ __ emit_data64(0x0302020102010100, relocInfo::none);
+ __ emit_data64(0x0403030203020201, relocInfo::none);
+ __ emit_data64(0x0302020102010100, relocInfo::none);
+ __ emit_data64(0x0403030203020201, relocInfo::none);
+ __ emit_data64(0x0302020102010100, relocInfo::none);
+ __ emit_data64(0x0403030203020201, relocInfo::none);
+ return start;
+ }
+
address generate_iota_indices(const char *stub_name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
@@ -2833,7 +2848,7 @@ class StubGenerator: public StubCodeGenerator {
__ align(OptoLoopAlignment);
__ BIND(L_store_element);
- __ store_heap_oop(to_element_addr, rax_oop, noreg, noreg, AS_RAW); // store the oop
+ __ store_heap_oop(to_element_addr, rax_oop, noreg, noreg, noreg, AS_RAW); // store the oop
__ increment(count); // increment the count toward zero
__ jcc(Assembler::zero, L_do_card_marks);
@@ -7713,6 +7728,11 @@ address generate_avx_ghash_processBlocks() {
StubRoutines::x86::_vector_long_sign_mask = generate_vector_mask("vector_long_sign_mask", 0x8000000000000000);
StubRoutines::x86::_vector_iota_indices = generate_iota_indices("iota_indices");
+ if (UsePopCountInstruction && VM_Version::supports_avx2() && !VM_Version::supports_avx512_vpopcntdq()) {
+ // lut implementation influenced by counting 1s algorithm from section 5-1 of Hackers' Delight.
+ StubRoutines::x86::_vector_popcount_lut = generate_popcount_avx_lut("popcount_lut");
+ }
+
// support for verify_oop (must happen after universe_init)
if (VerifyOops) {
StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
diff --git a/src/hotspot/cpu/x86/stubRoutines_x86.cpp b/src/hotspot/cpu/x86/stubRoutines_x86.cpp
index 81362c76bd69472828ffcac5040d3d8a5dd1fcd8..f5a0eb623d0d269c89bd905fee044ff47435a5e1 100644
--- a/src/hotspot/cpu/x86/stubRoutines_x86.cpp
+++ b/src/hotspot/cpu/x86/stubRoutines_x86.cpp
@@ -59,6 +59,7 @@ address StubRoutines::x86::_vector_double_sign_flip = NULL;
address StubRoutines::x86::_vector_byte_perm_mask = NULL;
address StubRoutines::x86::_vector_long_sign_mask = NULL;
address StubRoutines::x86::_vector_iota_indices = NULL;
+address StubRoutines::x86::_vector_popcount_lut = NULL;
address StubRoutines::x86::_vector_32_bit_mask = NULL;
address StubRoutines::x86::_vector_64_bit_mask = NULL;
#ifdef _LP64
diff --git a/src/hotspot/cpu/x86/stubRoutines_x86.hpp b/src/hotspot/cpu/x86/stubRoutines_x86.hpp
index e4dd9550ce28343e24917176eababb5913835137..5119dde4fd5427a866036ba95425c4116805ce15 100644
--- a/src/hotspot/cpu/x86/stubRoutines_x86.hpp
+++ b/src/hotspot/cpu/x86/stubRoutines_x86.hpp
@@ -177,6 +177,7 @@ class x86 {
static address _vector_short_shuffle_mask;
static address _vector_long_shuffle_mask;
static address _vector_iota_indices;
+ static address _vector_popcount_lut;
#ifdef _LP64
static juint _k256_W[];
static address _k256_W_adr;
@@ -340,6 +341,9 @@ class x86 {
return _vector_iota_indices;
}
+ static address vector_popcount_lut() {
+ return _vector_popcount_lut;
+ }
#ifdef _LP64
static address k256_W_addr() { return _k256_W_adr; }
static address k512_W_addr() { return _k512_W_addr; }
diff --git a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp
index ca7bcd8e50ec49d707f55f56589f83f37d6a046c..7b14aff6f1f6e2f7c71b8cd18e9576262a47e20c 100644
--- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp
+++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp
@@ -388,7 +388,6 @@ address TemplateInterpreterGenerator::generate_safept_entry_for(
void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow) {
Label done;
// Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not.
- int increment = InvocationCounter::count_increment;
Label no_mdo;
if (ProfileInterpreter) {
// Are we profiling?
@@ -399,7 +398,7 @@ void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow) {
const Address mdo_invocation_counter(rax, in_bytes(MethodData::invocation_counter_offset()) +
in_bytes(InvocationCounter::counter_offset()));
const Address mask(rax, in_bytes(MethodData::invoke_mask_offset()));
- __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, rcx, false, Assembler::zero, overflow);
+ __ increment_mask_and_jump(mdo_invocation_counter, mask, rcx, overflow);
__ jmp(done);
}
__ bind(no_mdo);
@@ -409,8 +408,7 @@ void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow) {
InvocationCounter::counter_offset());
__ get_method_counters(rbx, rax, done);
const Address mask(rax, in_bytes(MethodCounters::invoke_mask_offset()));
- __ increment_mask_and_jump(invocation_counter, increment, mask, rcx,
- false, Assembler::zero, overflow);
+ __ increment_mask_and_jump(invocation_counter, mask, rcx, overflow);
__ bind(done);
}
@@ -755,8 +753,8 @@ void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
__ bang_stack_with_offset(p*page_size);
}
- // Record a new watermark, unless the update is above the safe limit.
- // Otherwise, the next time around a check above would pass the safe limit.
+ // Record the new watermark, but only if update is above the safe limit.
+ // Otherwise, the next time around the check above would pass the safe limit.
__ cmpptr(rsp, Address(thread, JavaThread::shadow_zone_safe_limit()));
__ jccb(Assembler::belowEqual, L_done);
__ movptr(Address(thread, JavaThread::shadow_zone_growth_watermark()), rsp);
diff --git a/src/hotspot/cpu/x86/templateTable_x86.cpp b/src/hotspot/cpu/x86/templateTable_x86.cpp
index 0532fb17785c0faffd5a32bccec88edc19705e70..531ff7956b4bc86007977c5a8c4148f88dde12ef 100644
--- a/src/hotspot/cpu/x86/templateTable_x86.cpp
+++ b/src/hotspot/cpu/x86/templateTable_x86.cpp
@@ -152,7 +152,7 @@ static void do_oop_store(InterpreterMacroAssembler* _masm,
Register val,
DecoratorSet decorators = 0) {
assert(val == noreg || val == rax, "parameter is just for looks");
- __ store_heap_oop(dst, val, rdx, rbx, decorators);
+ __ store_heap_oop(dst, val, rdx, rbx, LP64_ONLY(r8) NOT_LP64(rsi), decorators);
}
static void do_oop_load(InterpreterMacroAssembler* _masm,
@@ -1067,7 +1067,7 @@ void TemplateTable::iastore() {
__ access_store_at(T_INT, IN_HEAP | IS_ARRAY,
Address(rdx, rbx, Address::times_4,
arrayOopDesc::base_offset_in_bytes(T_INT)),
- rax, noreg, noreg);
+ rax, noreg, noreg, noreg);
}
void TemplateTable::lastore() {
@@ -1081,7 +1081,7 @@ void TemplateTable::lastore() {
__ access_store_at(T_LONG, IN_HEAP | IS_ARRAY,
Address(rcx, rbx, Address::times_8,
arrayOopDesc::base_offset_in_bytes(T_LONG)),
- noreg /* ltos */, noreg, noreg);
+ noreg /* ltos */, noreg, noreg, noreg);
}
@@ -1095,7 +1095,7 @@ void TemplateTable::fastore() {
__ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY,
Address(rdx, rbx, Address::times_4,
arrayOopDesc::base_offset_in_bytes(T_FLOAT)),
- noreg /* ftos */, noreg, noreg);
+ noreg /* ftos */, noreg, noreg, noreg);
}
void TemplateTable::dastore() {
@@ -1108,7 +1108,7 @@ void TemplateTable::dastore() {
__ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY,
Address(rdx, rbx, Address::times_8,
arrayOopDesc::base_offset_in_bytes(T_DOUBLE)),
- noreg /* dtos */, noreg, noreg);
+ noreg /* dtos */, noreg, noreg, noreg);
}
void TemplateTable::aastore() {
@@ -1186,7 +1186,7 @@ void TemplateTable::bastore() {
__ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY,
Address(rdx, rbx,Address::times_1,
arrayOopDesc::base_offset_in_bytes(T_BYTE)),
- rax, noreg, noreg);
+ rax, noreg, noreg, noreg);
}
void TemplateTable::castore() {
@@ -1199,7 +1199,7 @@ void TemplateTable::castore() {
__ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY,
Address(rdx, rbx, Address::times_2,
arrayOopDesc::base_offset_in_bytes(T_CHAR)),
- rax, noreg, noreg);
+ rax, noreg, noreg, noreg);
}
@@ -2197,7 +2197,6 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
__ bind(has_counters);
Label no_mdo;
- int increment = InvocationCounter::count_increment;
if (ProfileInterpreter) {
// Are we profiling?
__ movptr(rbx, Address(rcx, in_bytes(Method::method_data_offset())));
@@ -2207,7 +2206,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
const Address mdo_backedge_counter(rbx, in_bytes(MethodData::backedge_counter_offset()) +
in_bytes(InvocationCounter::counter_offset()));
const Address mask(rbx, in_bytes(MethodData::backedge_mask_offset()));
- __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, rax, false, Assembler::zero,
+ __ increment_mask_and_jump(mdo_backedge_counter, mask, rax,
UseOnStackReplacement ? &backedge_counter_overflow : NULL);
__ jmp(dispatch);
}
@@ -2215,8 +2214,8 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
// Increment backedge counter in MethodCounters*
__ movptr(rcx, Address(rcx, Method::method_counters_offset()));
const Address mask(rcx, in_bytes(MethodCounters::backedge_mask_offset()));
- __ increment_mask_and_jump(Address(rcx, be_offset), increment, mask,
- rax, false, Assembler::zero, UseOnStackReplacement ? &backedge_counter_overflow : NULL);
+ __ increment_mask_and_jump(Address(rcx, be_offset), mask, rax,
+ UseOnStackReplacement ? &backedge_counter_overflow : NULL);
__ bind(dispatch);
}
@@ -3102,7 +3101,7 @@ void TemplateTable::putfield_or_static_helper(int byte_no, bool is_static, Rewri
{
__ pop(btos);
if (!is_static) pop_and_check_object(obj);
- __ access_store_at(T_BYTE, IN_HEAP, field, rax, noreg, noreg);
+ __ access_store_at(T_BYTE, IN_HEAP, field, rax, noreg, noreg, noreg);
if (!is_static && rc == may_rewrite) {
patch_bytecode(Bytecodes::_fast_bputfield, bc, rbx, true, byte_no);
}
@@ -3117,7 +3116,7 @@ void TemplateTable::putfield_or_static_helper(int byte_no, bool is_static, Rewri
{
__ pop(ztos);
if (!is_static) pop_and_check_object(obj);
- __ access_store_at(T_BOOLEAN, IN_HEAP, field, rax, noreg, noreg);
+ __ access_store_at(T_BOOLEAN, IN_HEAP, field, rax, noreg, noreg, noreg);
if (!is_static && rc == may_rewrite) {
patch_bytecode(Bytecodes::_fast_zputfield, bc, rbx, true, byte_no);
}
@@ -3148,7 +3147,7 @@ void TemplateTable::putfield_or_static_helper(int byte_no, bool is_static, Rewri
{
__ pop(itos);
if (!is_static) pop_and_check_object(obj);
- __ access_store_at(T_INT, IN_HEAP, field, rax, noreg, noreg);
+ __ access_store_at(T_INT, IN_HEAP, field, rax, noreg, noreg, noreg);
if (!is_static && rc == may_rewrite) {
patch_bytecode(Bytecodes::_fast_iputfield, bc, rbx, true, byte_no);
}
@@ -3163,7 +3162,7 @@ void TemplateTable::putfield_or_static_helper(int byte_no, bool is_static, Rewri
{
__ pop(ctos);
if (!is_static) pop_and_check_object(obj);
- __ access_store_at(T_CHAR, IN_HEAP, field, rax, noreg, noreg);
+ __ access_store_at(T_CHAR, IN_HEAP, field, rax, noreg, noreg, noreg);
if (!is_static && rc == may_rewrite) {
patch_bytecode(Bytecodes::_fast_cputfield, bc, rbx, true, byte_no);
}
@@ -3178,7 +3177,7 @@ void TemplateTable::putfield_or_static_helper(int byte_no, bool is_static, Rewri
{
__ pop(stos);
if (!is_static) pop_and_check_object(obj);
- __ access_store_at(T_SHORT, IN_HEAP, field, rax, noreg, noreg);
+ __ access_store_at(T_SHORT, IN_HEAP, field, rax, noreg, noreg, noreg);
if (!is_static && rc == may_rewrite) {
patch_bytecode(Bytecodes::_fast_sputfield, bc, rbx, true, byte_no);
}
@@ -3194,7 +3193,7 @@ void TemplateTable::putfield_or_static_helper(int byte_no, bool is_static, Rewri
__ pop(ltos);
if (!is_static) pop_and_check_object(obj);
// MO_RELAXED: generate atomic store for the case of volatile field (important for x86_32)
- __ access_store_at(T_LONG, IN_HEAP | MO_RELAXED, field, noreg /* ltos*/, noreg, noreg);
+ __ access_store_at(T_LONG, IN_HEAP | MO_RELAXED, field, noreg /* ltos*/, noreg, noreg, noreg);
#ifdef _LP64
if (!is_static && rc == may_rewrite) {
patch_bytecode(Bytecodes::_fast_lputfield, bc, rbx, true, byte_no);
@@ -3211,7 +3210,7 @@ void TemplateTable::putfield_or_static_helper(int byte_no, bool is_static, Rewri
{
__ pop(ftos);
if (!is_static) pop_and_check_object(obj);
- __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg);
+ __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg, noreg);
if (!is_static && rc == may_rewrite) {
patch_bytecode(Bytecodes::_fast_fputfield, bc, rbx, true, byte_no);
}
@@ -3230,7 +3229,7 @@ void TemplateTable::putfield_or_static_helper(int byte_no, bool is_static, Rewri
__ pop(dtos);
if (!is_static) pop_and_check_object(obj);
// MO_RELAXED: for the case of volatile field, in fact it adds no extra work for the underlying implementation
- __ access_store_at(T_DOUBLE, IN_HEAP | MO_RELAXED, field, noreg /* dtos */, noreg, noreg);
+ __ access_store_at(T_DOUBLE, IN_HEAP | MO_RELAXED, field, noreg /* dtos */, noreg, noreg, noreg);
if (!is_static && rc == may_rewrite) {
patch_bytecode(Bytecodes::_fast_dputfield, bc, rbx, true, byte_no);
}
@@ -3373,31 +3372,31 @@ void TemplateTable::fast_storefield_helper(Address field, Register rax) {
break;
case Bytecodes::_fast_lputfield:
#ifdef _LP64
- __ access_store_at(T_LONG, IN_HEAP, field, noreg /* ltos */, noreg, noreg);
+ __ access_store_at(T_LONG, IN_HEAP, field, noreg /* ltos */, noreg, noreg, noreg);
#else
__ stop("should not be rewritten");
#endif
break;
case Bytecodes::_fast_iputfield:
- __ access_store_at(T_INT, IN_HEAP, field, rax, noreg, noreg);
+ __ access_store_at(T_INT, IN_HEAP, field, rax, noreg, noreg, noreg);
break;
case Bytecodes::_fast_zputfield:
- __ access_store_at(T_BOOLEAN, IN_HEAP, field, rax, noreg, noreg);
+ __ access_store_at(T_BOOLEAN, IN_HEAP, field, rax, noreg, noreg, noreg);
break;
case Bytecodes::_fast_bputfield:
- __ access_store_at(T_BYTE, IN_HEAP, field, rax, noreg, noreg);
+ __ access_store_at(T_BYTE, IN_HEAP, field, rax, noreg, noreg, noreg);
break;
case Bytecodes::_fast_sputfield:
- __ access_store_at(T_SHORT, IN_HEAP, field, rax, noreg, noreg);
+ __ access_store_at(T_SHORT, IN_HEAP, field, rax, noreg, noreg, noreg);
break;
case Bytecodes::_fast_cputfield:
- __ access_store_at(T_CHAR, IN_HEAP, field, rax, noreg, noreg);
+ __ access_store_at(T_CHAR, IN_HEAP, field, rax, noreg, noreg, noreg);
break;
case Bytecodes::_fast_fputfield:
- __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos*/, noreg, noreg);
+ __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos*/, noreg, noreg, noreg);
break;
case Bytecodes::_fast_dputfield:
- __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos*/, noreg, noreg);
+ __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos*/, noreg, noreg, noreg);
break;
default:
ShouldNotReachHere();
diff --git a/src/hotspot/cpu/x86/vm_version_x86.hpp b/src/hotspot/cpu/x86/vm_version_x86.hpp
index 2fd1bbc9617002e6c84cb8f5e76fe090ef1b438c..2f4e31b4708ec8e9c777584dd5e1e5ea65111133 100644
--- a/src/hotspot/cpu/x86/vm_version_x86.hpp
+++ b/src/hotspot/cpu/x86/vm_version_x86.hpp
@@ -1044,6 +1044,25 @@ public:
static bool supports_clflushopt() { return ((_features & CPU_FLUSHOPT) != 0); }
static bool supports_clwb() { return ((_features & CPU_CLWB) != 0); }
+ // Old CPUs perform lea on AGU which causes additional latency transfering the
+ // value from/to ALU for other operations
+ static bool supports_fast_2op_lea() {
+ return (is_intel() && supports_avx()) || // Sandy Bridge and above
+ (is_amd() && supports_avx()); // Jaguar and Bulldozer and above
+ }
+
+ // Pre Icelake Intels suffer inefficiency regarding 3-operand lea, which contains
+ // all of base register, index register and displacement immediate, with 3 latency.
+ // Note that when the address contains no displacement but the base register is
+ // rbp or r13, the machine code must contain a zero displacement immediate,
+ // effectively transform a 2-operand lea into a 3-operand lea. This can be
+ // replaced by add-add or lea-add
+ static bool supports_fast_3op_lea() {
+ return supports_fast_2op_lea() &&
+ ((is_intel() && supports_clwb() && !is_intel_skylake()) || // Icelake and above
+ is_amd());
+ }
+
#ifdef __APPLE__
// Is the CPU running emulated (for example macOS Rosetta running x86_64 code on M1 ARM (aarch64)
static bool is_cpu_emulated();
diff --git a/src/hotspot/cpu/x86/vmreg_x86.hpp b/src/hotspot/cpu/x86/vmreg_x86.hpp
index 58df28f8491b96a3df528aa14f9f4ff8b5574ef9..a2d44a2cebed2f4844a0ff0fee0988b475aa8eb7 100644
--- a/src/hotspot/cpu/x86/vmreg_x86.hpp
+++ b/src/hotspot/cpu/x86/vmreg_x86.hpp
@@ -25,7 +25,7 @@
#ifndef CPU_X86_VMREG_X86_HPP
#define CPU_X86_VMREG_X86_HPP
-
+#include "register_x86.hpp"
inline bool is_Register() {
return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr;
@@ -36,14 +36,8 @@ inline bool is_FloatRegister() {
}
inline bool is_XMMRegister() {
- int uarch_max_xmm = ConcreteRegisterImpl::max_xmm;
-
-#ifdef _LP64
- if (UseAVX < 3) {
- int half_xmm = (XMMRegisterImpl::max_slots_per_register * XMMRegisterImpl::number_of_registers) / 2;
- uarch_max_xmm -= half_xmm;
- }
-#endif
+ int uarch_max_xmm = ConcreteRegisterImpl::max_fpr +
+ (XMMRegisterImpl::max_slots_per_register * XMMRegisterImpl::available_xmm_registers());
return (value() >= ConcreteRegisterImpl::max_fpr && value() < uarch_max_xmm);
}
diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad
index 7ff67e9a085562722946e6c04f95cb18e4136fb6..cf182d9880d382a147b1276cef7b3457512e2d68 100644
--- a/src/hotspot/cpu/x86/x86.ad
+++ b/src/hotspot/cpu/x86/x86.ad
@@ -1405,8 +1405,12 @@ const bool Matcher::match_rule_supported(int opcode) {
}
break;
case Op_PopCountVI:
+ if (!UsePopCountInstruction || (UseAVX < 2)) {
+ return false;
+ }
+ break;
case Op_PopCountVL:
- if (!UsePopCountInstruction || !VM_Version::supports_avx512_vpopcntdq()) {
+ if (!UsePopCountInstruction || (UseAVX <= 2)) {
return false;
}
break;
@@ -1464,6 +1468,16 @@ const bool Matcher::match_rule_supported(int opcode) {
return false;
}
break;
+ case Op_RoundVF:
+ if (UseAVX < 2) { // enabled for AVX2 only
+ return false;
+ }
+ break;
+ case Op_RoundVD:
+ if (UseAVX < 3) {
+ return false; // enabled for AVX3 only
+ }
+ break;
case Op_CompareAndSwapL:
#ifdef _LP64
case Op_CompareAndSwapP:
@@ -1568,6 +1582,12 @@ const bool Matcher::match_rule_supported(int opcode) {
return false;
}
break;
+ case Op_RoundF:
+ case Op_RoundD:
+ if (!is_LP64) {
+ return false;
+ }
+ break;
case Op_CopySignD:
case Op_CopySignF:
if (UseAVX < 3 || !is_LP64) {
@@ -1813,6 +1833,11 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
return false;
}
break;
+ case Op_RoundVD:
+ if (!VM_Version::supports_avx512dq()) {
+ return false;
+ }
+ break;
case Op_VectorCastF2X:
if (is_subword_type(bt) || bt == T_LONG) {
return false;
@@ -1861,6 +1886,18 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
return false;
}
break;
+ case Op_PopCountVI:
+ if (!VM_Version::supports_avx512_vpopcntdq() &&
+ (vlen == 16) && !VM_Version::supports_avx512bw()) {
+ return false;
+ }
+ break;
+ case Op_PopCountVL:
+ if (!VM_Version::supports_avx512_vpopcntdq() &&
+ ((vlen <= 4) || ((vlen == 8) && !VM_Version::supports_avx512bw()))) {
+ return false;
+ }
+ break;
}
return true; // Per default match rules are supported.
}
@@ -7157,13 +7194,14 @@ instruct vcastFtoD_reg(vec dst, vec src) %{
ins_pipe( pipe_slow );
%}
-instruct vcastFtoI_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rRegP scratch, rFlagsReg cr) %{
+
+instruct castFtoI_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rRegP scratch, rFlagsReg cr) %{
predicate(!VM_Version::supports_avx512vl() &&
Matcher::vector_length_in_bytes(n) < 64 &&
Matcher::vector_element_basic_type(n) == T_INT);
match(Set dst (VectorCastF2X src));
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP scratch, KILL cr);
- format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
+ format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $scratch as TEMP" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
__ vector_castF2I_avx($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
@@ -7173,13 +7211,13 @@ instruct vcastFtoI_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, ve
ins_pipe( pipe_slow );
%}
-instruct vcastFtoI_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
+instruct castFtoI_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
predicate((VM_Version::supports_avx512vl() ||
Matcher::vector_length_in_bytes(n) == 64) &&
Matcher::vector_element_basic_type(n) == T_INT);
match(Set dst (VectorCastF2X src));
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr);
- format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
+ format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
__ vector_castF2I_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
@@ -7200,11 +7238,11 @@ instruct vcastDtoF_reg(vec dst, vec src) %{
ins_pipe( pipe_slow );
%}
-instruct vcastDtoL_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
+instruct castDtoL_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
predicate(Matcher::vector_element_basic_type(n) == T_LONG);
match(Set dst (VectorCastD2X src));
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr);
- format %{ "vector_cast_d2l $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
+ format %{ "vector_cast_d2l $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
__ vector_castD2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
@@ -7230,6 +7268,56 @@ instruct vucast(vec dst, vec src) %{
ins_pipe( pipe_slow );
%}
+#ifdef _LP64
+instruct vround_float_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rRegP scratch, rFlagsReg cr) %{
+ predicate(!VM_Version::supports_avx512vl() &&
+ Matcher::vector_length_in_bytes(n) < 64 &&
+ Matcher::vector_element_basic_type(n) == T_INT);
+ match(Set dst (RoundVF src));
+ effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP scratch, KILL cr);
+ format %{ "vector_round_float $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $scratch as TEMP" %}
+ ins_encode %{
+ int vlen_enc = vector_length_encoding(this);
+ InternalAddress new_mxcsr = $constantaddress((jint)0x3F80);
+ __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
+ $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
+ ExternalAddress(vector_float_signflip()), new_mxcsr, $scratch$$Register, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vround_float_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
+ predicate((VM_Version::supports_avx512vl() ||
+ Matcher::vector_length_in_bytes(n) == 64) &&
+ Matcher::vector_element_basic_type(n) == T_INT);
+ match(Set dst (RoundVF src));
+ effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr);
+ format %{ "vector_round_float $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %}
+ ins_encode %{
+ int vlen_enc = vector_length_encoding(this);
+ InternalAddress new_mxcsr = $constantaddress((jint)0x3F80);
+ __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
+ $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
+ ExternalAddress(vector_float_signflip()), new_mxcsr, $scratch$$Register, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vround_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
+ predicate(Matcher::vector_element_basic_type(n) == T_LONG);
+ match(Set dst (RoundVD src));
+ effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr);
+ format %{ "vector_round_long $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %}
+ ins_encode %{
+ int vlen_enc = vector_length_encoding(this);
+ InternalAddress new_mxcsr = $constantaddress((jint)0x3F80);
+ __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
+ $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
+ ExternalAddress(vector_double_signflip()), new_mxcsr, $scratch$$Register, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+#endif
// --------------------------------- VectorMaskCmp --------------------------------------
instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
@@ -8571,28 +8659,54 @@ instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
// --------------------------------- PopCount --------------------------------------
-instruct vpopcountI(vec dst, vec src) %{
+instruct vpopcountI_popcntd(vec dst, vec src) %{
+ predicate(VM_Version::supports_avx512_vpopcntdq());
match(Set dst (PopCountVI src));
- format %{ "vpopcntd $dst,$src\t! vector popcount packedI" %}
+ format %{ "vector_popcount_int $dst, $src\t! vector popcount packedI" %}
ins_encode %{
assert(UsePopCountInstruction, "not enabled");
+ int vlen_enc = vector_length_encoding(this);
+ __ vector_popcount_int($dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, noreg, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+instruct vpopcountI(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp, rFlagsReg cc) %{
+ predicate(!VM_Version::supports_avx512_vpopcntdq());
+ match(Set dst (PopCountVI src));
+ effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, KILL cc);
+ format %{ "vector_popcount_int $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
+ ins_encode %{
+ assert(UsePopCountInstruction, "not enabled");
int vlen_enc = vector_length_encoding(this);
- __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ __ vector_popcount_int($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
+ $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
-instruct vpopcountL(vec dst, vec src) %{
+instruct vpopcountL_popcntd(vec dst, vec src) %{
+ predicate(VM_Version::supports_avx512_vpopcntdq());
match(Set dst (PopCountVL src));
- format %{ "vpopcntq $dst,$src\t! vector popcount packedL" %}
+ format %{ "vector_popcount_long $dst, $src\t! vector popcount packedL" %}
ins_encode %{
assert(UsePopCountInstruction, "not enabled");
-
int vlen_enc = vector_length_encoding(this, $src);
- __ vpopcntq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
- __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ __ vector_popcount_long($dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, noreg, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+instruct vpopcountL(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp, rFlagsReg cc) %{
+ predicate(!VM_Version::supports_avx512_vpopcntdq());
+ match(Set dst (PopCountVL src));
+ effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, KILL cc);
+ format %{ "vector_popcount_long $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
+ ins_encode %{
+ assert(UsePopCountInstruction, "not enabled");
+ int vlen_enc = vector_length_encoding(this, $src);
+ __ vector_popcount_long($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
+ $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
diff --git a/src/hotspot/cpu/x86/x86_32.ad b/src/hotspot/cpu/x86/x86_32.ad
index 3bb01e3eeaeb81f7bc9d27df43a9923601c3fe7d..9bba150516ed134ae16e34b52cf97242f2ef22e7 100644
--- a/src/hotspot/cpu/x86/x86_32.ad
+++ b/src/hotspot/cpu/x86/x86_32.ad
@@ -12122,34 +12122,34 @@ instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
ins_pipe( pipe_slow );
%}
-instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
- regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
+instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result,
+ regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
%{
predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
- match(Set result (HasNegatives ary1 len));
+ match(Set result (CountPositives ary1 len));
effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
- format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
+ format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
ins_encode %{
- __ has_negatives($ary1$$Register, $len$$Register,
- $result$$Register, $tmp3$$Register,
- $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
+ __ count_positives($ary1$$Register, $len$$Register,
+ $result$$Register, $tmp3$$Register,
+ $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
%}
ins_pipe( pipe_slow );
%}
-instruct has_negatives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
- regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
+instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
+ regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
%{
predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
- match(Set result (HasNegatives ary1 len));
+ match(Set result (CountPositives ary1 len));
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
- format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
+ format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
ins_encode %{
- __ has_negatives($ary1$$Register, $len$$Register,
- $result$$Register, $tmp3$$Register,
- $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
+ __ count_positives($ary1$$Register, $len$$Register,
+ $result$$Register, $tmp3$$Register,
+ $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
%}
ins_pipe( pipe_slow );
%}
diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad
index fbf71300dcd6b6da8a0dc4b034f0c152dfdcdaae..62132ea930f867ab04f8d9010e7d1dfbd0e60dfd 100644
--- a/src/hotspot/cpu/x86/x86_64.ad
+++ b/src/hotspot/cpu/x86/x86_64.ad
@@ -1,5 +1,5 @@
//
-// Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@@ -241,6 +241,11 @@ reg_class long_no_rcx_reg %{
return _LONG_NO_RCX_REG_mask;
%}
+// Class for all long registers (excluding RBP and R13)
+reg_class long_no_rbp_r13_reg %{
+ return _LONG_NO_RBP_R13_REG_mask;
+%}
+
// Class for all int registers (excluding RSP)
reg_class int_reg %{
return _INT_REG_mask;
@@ -256,6 +261,11 @@ reg_class int_no_rcx_reg %{
return _INT_NO_RCX_REG_mask;
%}
+// Class for all int registers (excluding RBP and R13)
+reg_class int_no_rbp_r13_reg %{
+ return _INT_NO_RBP_R13_REG_mask;
+%}
+
// Singleton class for RAX pointer register
reg_class ptr_rax_reg(RAX, RAX_H);
@@ -319,9 +329,11 @@ extern RegMask _PTR_NO_RAX_RBX_REG_mask;
extern RegMask _LONG_REG_mask;
extern RegMask _LONG_NO_RAX_RDX_REG_mask;
extern RegMask _LONG_NO_RCX_REG_mask;
+extern RegMask _LONG_NO_RBP_R13_REG_mask;
extern RegMask _INT_REG_mask;
extern RegMask _INT_NO_RAX_RDX_REG_mask;
extern RegMask _INT_NO_RCX_REG_mask;
+extern RegMask _INT_NO_RBP_R13_REG_mask;
extern RegMask _FLOAT_REG_mask;
extern RegMask _STACK_OR_PTR_REG_mask;
@@ -348,9 +360,11 @@ RegMask _PTR_NO_RAX_RBX_REG_mask;
RegMask _LONG_REG_mask;
RegMask _LONG_NO_RAX_RDX_REG_mask;
RegMask _LONG_NO_RCX_REG_mask;
+RegMask _LONG_NO_RBP_R13_REG_mask;
RegMask _INT_REG_mask;
RegMask _INT_NO_RAX_RDX_REG_mask;
RegMask _INT_NO_RCX_REG_mask;
+RegMask _INT_NO_RBP_R13_REG_mask;
RegMask _FLOAT_REG_mask;
RegMask _STACK_OR_PTR_REG_mask;
RegMask _STACK_OR_LONG_REG_mask;
@@ -409,6 +423,12 @@ void reg_mask_init() {
_LONG_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
_LONG_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
+ _LONG_NO_RBP_R13_REG_mask = _LONG_REG_mask;
+ _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
+ _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
+ _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()));
+ _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
+
_INT_REG_mask = _ALL_INT_REG_mask;
if (PreserveFramePointer) {
_INT_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
@@ -427,6 +447,10 @@ void reg_mask_init() {
_INT_NO_RCX_REG_mask = _INT_REG_mask;
_INT_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
+ _INT_NO_RBP_R13_REG_mask = _INT_REG_mask;
+ _INT_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
+ _INT_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()));
+
// _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
// from the float_reg_legacy/float_reg_evex register class.
_FLOAT_REG_mask = VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask;
@@ -1926,7 +1950,7 @@ encode %{
Label done;
// cmp $0x80000000,%eax
- __ cmp(as_Register(RAX_enc), 0x80000000);
+ __ cmpl(as_Register(RAX_enc), 0x80000000);
// jne e
__ jccb(Assembler::notEqual, normal);
@@ -3491,6 +3515,21 @@ operand no_rax_rdx_RegI()
interface(REG_INTER);
%}
+operand no_rbp_r13_RegI()
+%{
+ constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
+ match(RegI);
+ match(rRegI);
+ match(rax_RegI);
+ match(rbx_RegI);
+ match(rcx_RegI);
+ match(rdx_RegI);
+ match(rdi_RegI);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
// Pointer Register
operand any_RegP()
%{
@@ -3718,6 +3757,19 @@ operand rdx_RegL()
interface(REG_INTER);
%}
+operand no_rbp_r13_RegL()
+%{
+ constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
+ match(RegL);
+ match(rRegL);
+ match(rax_RegL);
+ match(rcx_RegL);
+ match(rdx_RegL);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
// Flags register, used as output of compare instructions
operand rFlagsReg()
%{
@@ -7443,14 +7495,53 @@ instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
ins_pipe(ialu_mem_imm);
%}
-instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
+instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
%{
- match(Set dst (AddI src0 src1));
+ predicate(VM_Version::supports_fast_2op_lea());
+ match(Set dst (AddI (LShiftI index scale) disp));
- ins_cost(110);
- format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
+ format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
ins_encode %{
- __ leal($dst$$Register, Address($src0$$Register, $src1$$constant));
+ Address::ScaleFactor scale = static_cast($scale$$constant);
+ __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
+%{
+ predicate(VM_Version::supports_fast_3op_lea());
+ match(Set dst (AddI (AddI base index) disp));
+
+ format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
+ ins_encode %{
+ __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
+%{
+ predicate(VM_Version::supports_fast_2op_lea());
+ match(Set dst (AddI base (LShiftI index scale)));
+
+ format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
+ ins_encode %{
+ Address::ScaleFactor scale = static_cast($scale$$constant);
+ __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
+%{
+ predicate(VM_Version::supports_fast_3op_lea());
+ match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
+
+ format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
+ ins_encode %{
+ Address::ScaleFactor scale = static_cast($scale$$constant);
+ __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
%}
ins_pipe(ialu_reg_reg);
%}
@@ -7574,14 +7665,53 @@ instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
ins_pipe(ialu_mem_imm);
%}
-instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
+instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
%{
- match(Set dst (AddL src0 src1));
+ predicate(VM_Version::supports_fast_2op_lea());
+ match(Set dst (AddL (LShiftL index scale) disp));
- ins_cost(110);
- format %{ "leaq $dst, [$src0 + $src1]\t# long" %}
+ format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
+ ins_encode %{
+ Address::ScaleFactor scale = static_cast($scale$$constant);
+ __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
+%{
+ predicate(VM_Version::supports_fast_3op_lea());
+ match(Set dst (AddL (AddL base index) disp));
+
+ format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
+ ins_encode %{
+ __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
+%{
+ predicate(VM_Version::supports_fast_2op_lea());
+ match(Set dst (AddL base (LShiftL index scale)));
+
+ format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
+ ins_encode %{
+ Address::ScaleFactor scale = static_cast($scale$$constant);
+ __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
+ %}
+ ins_pipe(ialu_reg_reg);
+%}
+
+instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
+%{
+ predicate(VM_Version::supports_fast_3op_lea());
+ match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
+
+ format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
ins_encode %{
- __ leaq($dst$$Register, Address($src0$$Register, $src1$$constant));
+ Address::ScaleFactor scale = static_cast($scale$$constant);
+ __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
%}
ins_pipe(ialu_reg_reg);
%}
@@ -7612,18 +7742,6 @@ instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
// XXX addP mem ops ????
-instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
-%{
- match(Set dst (AddP src0 src1));
-
- ins_cost(110);
- format %{ "leaq $dst, [$src0 + $src1]\t# ptr" %}
- ins_encode %{
- __ leaq($dst$$Register, Address($src0$$Register, $src1$$constant));
- %}
- ins_pipe(ialu_reg_reg);
-%}
-
instruct checkCastPP(rRegP dst)
%{
match(Set dst (CheckCastPP dst));
@@ -10703,6 +10821,28 @@ instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
ins_pipe(pipe_slow);
%}
+instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
+%{
+ match(Set dst (RoundD src));
+ effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
+ format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
+ ins_encode %{
+ __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
+%{
+ match(Set dst (RoundF src));
+ effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
+ format %{ "round_float $dst,$src" %}
+ ins_encode %{
+ __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
instruct convI2F_reg_reg(regF dst, rRegI src)
%{
predicate(!UseXmmI2F);
@@ -11685,34 +11825,34 @@ instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
ins_pipe( pipe_slow );
%}
-instruct has_negatives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
- legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
+instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
+ legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
%{
predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
- match(Set result (HasNegatives ary1 len));
+ match(Set result (CountPositives ary1 len));
effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
- format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
+ format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
ins_encode %{
- __ has_negatives($ary1$$Register, $len$$Register,
- $result$$Register, $tmp3$$Register,
- $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
+ __ count_positives($ary1$$Register, $len$$Register,
+ $result$$Register, $tmp3$$Register,
+ $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
%}
ins_pipe( pipe_slow );
%}
-instruct has_negatives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
- legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
+instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
+ legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
%{
predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
- match(Set result (HasNegatives ary1 len));
+ match(Set result (CountPositives ary1 len));
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
- format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
+ format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
ins_encode %{
- __ has_negatives($ary1$$Register, $len$$Register,
- $result$$Register, $tmp3$$Register,
- $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
+ __ count_positives($ary1$$Register, $len$$Register,
+ $result$$Register, $tmp3$$Register,
+ $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
%}
ins_pipe( pipe_slow );
%}
diff --git a/src/hotspot/cpu/zero/frame_zero.inline.hpp b/src/hotspot/cpu/zero/frame_zero.inline.hpp
index 396e189a5db4039fbf03ca242a56919e524d4354..dfca0e4bcb11c4f9bbc08d09a6d791f566430ae5 100644
--- a/src/hotspot/cpu/zero/frame_zero.inline.hpp
+++ b/src/hotspot/cpu/zero/frame_zero.inline.hpp
@@ -82,6 +82,11 @@ inline intptr_t* frame::link() const {
return NULL;
}
+inline intptr_t* frame::link_or_null() const {
+ ShouldNotCallThis();
+ return NULL;
+}
+
inline interpreterState frame::get_interpreterState() const {
return zero_interpreterframe()->interpreter_state();
}
diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
index 18b908cfc8fc0eec9e0369b342807920521158de..cba539caf425355a5d9357141b3dc7d3fdce114a 100644
--- a/src/hotspot/os/linux/os_linux.cpp
+++ b/src/hotspot/os/linux/os_linux.cpp
@@ -1,5 +1,6 @@
/*
* Copyright (c) 1999, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -2092,6 +2093,34 @@ bool os::Linux::query_process_memory_info(os::Linux::meminfo_t* info) {
return false;
}
+#ifdef __GLIBC__
+// For Glibc, print a one-liner with the malloc tunables.
+// Most important and popular is MALLOC_ARENA_MAX, but we are
+// thorough and print them all.
+static void print_glibc_malloc_tunables(outputStream* st) {
+ static const char* var[] = {
+ // the new variant
+ "GLIBC_TUNABLES",
+ // legacy variants
+ "MALLOC_CHECK_", "MALLOC_TOP_PAD_", "MALLOC_PERTURB_",
+ "MALLOC_MMAP_THRESHOLD_", "MALLOC_TRIM_THRESHOLD_",
+ "MALLOC_MMAP_MAX_", "MALLOC_ARENA_TEST", "MALLOC_ARENA_MAX",
+ NULL};
+ st->print("glibc malloc tunables: ");
+ bool printed = false;
+ for (int i = 0; var[i] != NULL; i ++) {
+ const char* const val = ::getenv(var[i]);
+ if (val != NULL) {
+ st->print("%s%s=%s", (printed ? ", " : ""), var[i], val);
+ printed = true;
+ }
+ }
+ if (!printed) {
+ st->print("(default)");
+ }
+}
+#endif // __GLIBC__
+
void os::Linux::print_process_memory_info(outputStream* st) {
st->print_cr("Process Memory:");
@@ -2114,8 +2143,9 @@ void os::Linux::print_process_memory_info(outputStream* st) {
st->print_cr("Could not open /proc/self/status to get process memory related information");
}
- // Print glibc outstanding allocations.
- // (note: there is no implementation of mallinfo for muslc)
+ // glibc only:
+ // - Print outstanding allocations using mallinfo
+ // - Print glibc tunables
#ifdef __GLIBC__
size_t total_allocated = 0;
bool might_have_wrapped = false;
@@ -2123,9 +2153,10 @@ void os::Linux::print_process_memory_info(outputStream* st) {
struct glibc_mallinfo2 mi = _mallinfo2();
total_allocated = mi.uordblks;
} else if (_mallinfo != NULL) {
- // mallinfo is an old API. Member names mean next to nothing and, beyond that, are int.
- // So values may have wrapped around. Still useful enough to see how much glibc thinks
- // we allocated.
+ // mallinfo is an old API. Member names mean next to nothing and, beyond that, are 32-bit signed.
+ // So for larger footprints the values may have wrapped around. We try to detect this here: if the
+ // process whole resident set size is smaller than 4G, malloc footprint has to be less than that
+ // and the numbers are reliable.
struct glibc_mallinfo mi = _mallinfo();
total_allocated = (size_t)(unsigned)mi.uordblks;
// Since mallinfo members are int, glibc values may have wrapped. Warn about this.
@@ -2136,8 +2167,10 @@ void os::Linux::print_process_memory_info(outputStream* st) {
total_allocated / K,
might_have_wrapped ? " (may have wrapped)" : "");
}
-#endif // __GLIBC__
-
+ // Tunables
+ print_glibc_malloc_tunables(st);
+ st->cr();
+#endif
}
bool os::Linux::print_ld_preload_file(outputStream* st) {
@@ -2463,6 +2496,8 @@ void os::get_summary_cpu_info(char* cpuinfo, size_t length) {
strncpy(cpuinfo, "IA64", length);
#elif defined(PPC)
strncpy(cpuinfo, "PPC64", length);
+#elif defined(RISCV)
+ strncpy(cpuinfo, "RISCV64", length);
#elif defined(S390)
strncpy(cpuinfo, "S390", length);
#elif defined(SPARC)
@@ -3939,23 +3974,14 @@ char* os::Linux::reserve_memory_special_shm(size_t bytes, size_t alignment,
return addr;
}
-static void warn_on_commit_special_failure(char* req_addr, size_t bytes,
+static void log_on_commit_special_failure(char* req_addr, size_t bytes,
size_t page_size, int error) {
assert(error == ENOMEM, "Only expect to fail if no memory is available");
- bool warn_on_failure = UseLargePages &&
- (!FLAG_IS_DEFAULT(UseLargePages) ||
- !FLAG_IS_DEFAULT(UseHugeTLBFS) ||
- !FLAG_IS_DEFAULT(LargePageSizeInBytes));
-
- if (warn_on_failure) {
- char msg[128];
- jio_snprintf(msg, sizeof(msg), "Failed to reserve and commit memory. req_addr: "
- PTR_FORMAT " bytes: " SIZE_FORMAT " page size: "
- SIZE_FORMAT " (errno = %d).",
- req_addr, bytes, page_size, error);
- warning("%s", msg);
- }
+ log_info(pagesize)("Failed to reserve and commit memory with given page size. req_addr: " PTR_FORMAT
+ " size: " SIZE_FORMAT "%s, page size: " SIZE_FORMAT "%s, (errno = %d)",
+ p2i(req_addr), byte_size_in_exact_unit(bytes), exact_unit_for_byte_size(bytes),
+ byte_size_in_exact_unit(page_size), exact_unit_for_byte_size(page_size), error);
}
bool os::Linux::commit_memory_special(size_t bytes,
@@ -3977,7 +4003,7 @@ bool os::Linux::commit_memory_special(size_t bytes,
char* addr = (char*)::mmap(req_addr, bytes, prot, flags, -1, 0);
if (addr == MAP_FAILED) {
- warn_on_commit_special_failure(req_addr, bytes, page_size, errno);
+ log_on_commit_special_failure(req_addr, bytes, page_size, errno);
return false;
}
diff --git a/src/hotspot/os/posix/signals_posix.cpp b/src/hotspot/os/posix/signals_posix.cpp
index 6e94b47712f95f9194a3d4a3a138eec629b9535e..6ec5bbd6c2cd9af117de8fef5b864880218c273e 100644
--- a/src/hotspot/os/posix/signals_posix.cpp
+++ b/src/hotspot/os/posix/signals_posix.cpp
@@ -1244,8 +1244,10 @@ void set_signal_handler(int sig, bool do_check = true) {
}
#endif
- // Save handler setup for later checking
- vm_handlers.set(sig, &sigAct);
+ // Save handler setup for possible later checking
+ if (do_check) {
+ vm_handlers.set(sig, &sigAct);
+ }
do_check_signal_periodically[sig] = do_check;
int ret = sigaction(sig, &sigAct, &oldAct);
diff --git a/src/hotspot/os/windows/attachListener_windows.cpp b/src/hotspot/os/windows/attachListener_windows.cpp
index 710afc410051f627ff45807be7454e948e263a8b..07a214d4352ca693598183e3ed9e762aa55d6654 100644
--- a/src/hotspot/os/windows/attachListener_windows.cpp
+++ b/src/hotspot/os/windows/attachListener_windows.cpp
@@ -154,7 +154,7 @@ class Win32AttachOperation: public AttachOperation {
}
public:
- void Win32AttachOperation::complete(jint result, bufferedStream* result_stream);
+ void complete(jint result, bufferedStream* result_stream);
};
diff --git a/src/hotspot/os_cpu/linux_aarch64/icache_linux_aarch64.hpp b/src/hotspot/os_cpu/linux_aarch64/icache_linux_aarch64.hpp
index 37221953a7777e889490ded0daf3d8202e79c542..75ca68e43fb4891e99cb6ca939521a57647a161d 100644
--- a/src/hotspot/os_cpu/linux_aarch64/icache_linux_aarch64.hpp
+++ b/src/hotspot/os_cpu/linux_aarch64/icache_linux_aarch64.hpp
@@ -41,4 +41,4 @@ class ICache : public AbstractICache {
}
};
-#endif // OS_CPU_LINUX_AARCH64_ICACHE_AARCH64_HPP
\ No newline at end of file
+#endif // OS_CPU_LINUX_AARCH64_ICACHE_AARCH64_HPP
diff --git a/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f2610af6cdd0f50db6d50a7de3c7bd8569c5499d
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// nothing required here
diff --git a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..761da5d743edd921d92dff9c3a6d059ddad7eb9e
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
+#define OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
+
+#include "runtime/vm_version.hpp"
+
+// Implementation of class atomic
+
+// Note that memory_order_conservative requires a full barrier after atomic stores.
+// See https://patchwork.kernel.org/patch/3575821/
+
+template
+struct Atomic::PlatformAdd {
+ template
+ D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const {
+ D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE);
+ FULL_MEM_BARRIER;
+ return res;
+ }
+
+ template
+ D fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const {
+ return add_and_fetch(dest, add_value, order) - add_value;
+ }
+};
+
+template
+template
+inline T Atomic::PlatformXchg::operator()(T volatile* dest,
+ T exchange_value,
+ atomic_memory_order order) const {
+ STATIC_ASSERT(byte_size == sizeof(T));
+ T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE);
+ FULL_MEM_BARRIER;
+ return res;
+}
+
+// __attribute__((unused)) on dest is to get rid of spurious GCC warnings.
+template
+template
+inline T Atomic::PlatformCmpxchg::operator()(T volatile* dest __attribute__((unused)),
+ T compare_value,
+ T exchange_value,
+ atomic_memory_order order) const {
+ STATIC_ASSERT(byte_size == sizeof(T));
+ T value = compare_value;
+ if (order != memory_order_relaxed) {
+ FULL_MEM_BARRIER;
+ }
+
+ __atomic_compare_exchange(dest, &value, &exchange_value, /* weak */ false,
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+
+ if (order != memory_order_relaxed) {
+ FULL_MEM_BARRIER;
+ }
+ return value;
+}
+
+template<>
+template
+inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest __attribute__((unused)),
+ T compare_value,
+ T exchange_value,
+ atomic_memory_order order) const {
+ STATIC_ASSERT(4 == sizeof(T));
+ if (order != memory_order_relaxed) {
+ FULL_MEM_BARRIER;
+ }
+ T rv;
+ int tmp;
+ __asm volatile(
+ "1:\n\t"
+ " addiw %[tmp], %[cv], 0\n\t" // make sure compare_value signed_extend
+ " lr.w.aq %[rv], (%[dest])\n\t"
+ " bne %[rv], %[tmp], 2f\n\t"
+ " sc.w.rl %[tmp], %[ev], (%[dest])\n\t"
+ " bnez %[tmp], 1b\n\t"
+ "2:\n\t"
+ : [rv] "=&r" (rv), [tmp] "=&r" (tmp)
+ : [ev] "r" (exchange_value), [dest] "r" (dest), [cv] "r" (compare_value)
+ : "memory");
+ if (order != memory_order_relaxed) {
+ FULL_MEM_BARRIER;
+ }
+ return rv;
+}
+
+template
+struct Atomic::PlatformOrderedLoad
+{
+ template
+ T operator()(const volatile T* p) const { T data; __atomic_load(const_cast(p), &data, __ATOMIC_ACQUIRE); return data; }
+};
+
+template
+struct Atomic::PlatformOrderedStore
+{
+ template
+ void operator()(volatile T* p, T v) const { __atomic_store(const_cast(p), &v, __ATOMIC_RELEASE); }
+};
+
+template
+struct Atomic::PlatformOrderedStore
+{
+ template
+ void operator()(volatile T* p, T v) const { release_store(p, v); OrderAccess::fence(); }
+};
+
+#endif // OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
diff --git a/src/hotspot/share/gc/shared/cardGeneration.inline.hpp b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp
similarity index 56%
rename from src/hotspot/share/gc/shared/cardGeneration.inline.hpp
rename to src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp
index bd8285af679de0b6640abee295b683562e2d079d..28868c7640640384799472f90cb0dea0647a9f12 100644
--- a/src/hotspot/share/gc/shared/cardGeneration.inline.hpp
+++ b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp
@@ -1,5 +1,6 @@
/*
- * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -22,35 +23,23 @@
*
*/
-#ifndef SHARE_GC_SHARED_CARDGENERATION_INLINE_HPP
-#define SHARE_GC_SHARED_CARDGENERATION_INLINE_HPP
+#ifndef OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP
+#define OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP
-#include "gc/shared/cardGeneration.hpp"
+#include
-#include "gc/shared/space.hpp"
-
-inline size_t CardGeneration::capacity() const {
- return space()->capacity();
-}
-
-inline size_t CardGeneration::used() const {
- return space()->used();
-}
-
-inline size_t CardGeneration::free() const {
- return space()->free();
-}
-
-inline MemRegion CardGeneration::used_region() const {
- return space()->used_region();
+// Efficient swapping of data bytes from Java byte
+// ordering to native byte ordering and vice versa.
+inline u2 Bytes::swap_u2(u2 x) {
+ return bswap_16(x);
}
-inline bool CardGeneration::is_in(const void* p) const {
- return space()->is_in(p);
+inline u4 Bytes::swap_u4(u4 x) {
+ return bswap_32(x);
}
-inline CompactibleSpace* CardGeneration::first_compaction_space() const {
- return space();
+inline u8 Bytes::swap_u8(u8 x) {
+ return bswap_64(x);
}
-#endif // SHARE_GC_SHARED_CARDGENERATION_INLINE_HPP
+#endif // OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP
diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..147cfdf3c100d541d42f9ce554b6ba57157929b7
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP
+#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP
+
+// Empty for build system
+
+#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP
diff --git a/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..1aa58f27871d2e3a1d6dae17ca0a6173a20e3a89
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP
+#define OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP
+
+#include
+
+//
+// Support for building on older Linux systems
+//
+
+#ifndef SYS_memfd_create
+#define SYS_memfd_create 279
+#endif
+#ifndef SYS_fallocate
+#define SYS_fallocate 47
+#endif
+
+#endif // OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP
diff --git a/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..297414bfcd510b9354184fb6d6f4c6c0b571c05b
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP
+#define OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP
+
+// Sets the default values for platform dependent flags used by the runtime system.
+// (see globals.hpp)
+
+define_pd_global(bool, DontYieldALot, false);
+define_pd_global(intx, ThreadStackSize, 2048); // 0 => use system default
+define_pd_global(intx, VMThreadStackSize, 2048);
+
+define_pd_global(intx, CompilerThreadStackSize, 2048);
+
+define_pd_global(uintx, JVMInvokeMethodSlack, 8192);
+
+// Used on 64 bit platforms for UseCompressedOops base address
+define_pd_global(uintx, HeapBaseMinAddress, 2 * G);
+
+#endif // OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP
diff --git a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..1c33dc1e87fadcde29dc54ad29c38cf005441e6b
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
+#define OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
+
+// Included in orderAccess.hpp header file.
+
+#include "runtime/vm_version.hpp"
+
+// Implementation of class OrderAccess.
+
+inline void OrderAccess::loadload() { acquire(); }
+inline void OrderAccess::storestore() { release(); }
+inline void OrderAccess::loadstore() { acquire(); }
+inline void OrderAccess::storeload() { fence(); }
+
+#define FULL_MEM_BARRIER __sync_synchronize()
+#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE);
+#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
+
+inline void OrderAccess::acquire() {
+ READ_MEM_BARRIER;
+}
+
+inline void OrderAccess::release() {
+ WRITE_MEM_BARRIER;
+}
+
+inline void OrderAccess::fence() {
+ FULL_MEM_BARRIER;
+}
+
+inline void OrderAccess::cross_modify_fence_impl() {
+ asm volatile("fence.i" : : : "memory");
+ if (UseConservativeFence) {
+ asm volatile("fence ir, ir" : : : "memory");
+ }
+}
+
+#endif // OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1f46bbab0a2e1241668e0892131c29e959cab679
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
@@ -0,0 +1,466 @@
+/*
+ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// no precompiled headers
+#include "asm/macroAssembler.hpp"
+#include "classfile/vmSymbols.hpp"
+#include "code/codeCache.hpp"
+#include "code/icBuffer.hpp"
+#include "code/nativeInst.hpp"
+#include "code/vtableStubs.hpp"
+#include "interpreter/interpreter.hpp"
+#include "jvm.h"
+#include "memory/allocation.inline.hpp"
+#include "os_share_linux.hpp"
+#include "prims/jniFastGetField.hpp"
+#include "prims/jvm_misc.hpp"
+#include "runtime/arguments.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/interfaceSupport.inline.hpp"
+#include "runtime/java.hpp"
+#include "runtime/javaCalls.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/osThread.hpp"
+#include "runtime/safepointMechanism.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
+#include "runtime/timer.hpp"
+#include "signals_posix.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/events.hpp"
+#include "utilities/vmError.hpp"
+
+// put OS-includes here
+# include
+# include
+# include
+# include
+# include
+# include
+# include
+# include
+# include
+# include
+# include
+# include
+# include
+# include
+# include
+# include
+# include
+# include
+# include
+
+#define REG_LR 1
+#define REG_FP 8
+
+NOINLINE address os::current_stack_pointer() {
+ return (address)__builtin_frame_address(0);
+}
+
+char* os::non_memory_address_word() {
+ // Must never look like an address returned by reserve_memory,
+ return (char*) -1;
+}
+
+address os::Posix::ucontext_get_pc(const ucontext_t * uc) {
+ return (address)uc->uc_mcontext.__gregs[REG_PC];
+}
+
+void os::Posix::ucontext_set_pc(ucontext_t * uc, address pc) {
+ uc->uc_mcontext.__gregs[REG_PC] = (intptr_t)pc;
+}
+
+intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) {
+ return (intptr_t*)uc->uc_mcontext.__gregs[REG_SP];
+}
+
+intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) {
+ return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP];
+}
+
+address os::fetch_frame_from_context(const void* ucVoid,
+ intptr_t** ret_sp, intptr_t** ret_fp) {
+ address epc;
+ const ucontext_t* uc = (const ucontext_t*)ucVoid;
+
+ if (uc != NULL) {
+ epc = os::Posix::ucontext_get_pc(uc);
+ if (ret_sp != NULL) {
+ *ret_sp = os::Linux::ucontext_get_sp(uc);
+ }
+ if (ret_fp != NULL) {
+ *ret_fp = os::Linux::ucontext_get_fp(uc);
+ }
+ } else {
+ epc = NULL;
+ if (ret_sp != NULL) {
+ *ret_sp = (intptr_t *)NULL;
+ }
+ if (ret_fp != NULL) {
+ *ret_fp = (intptr_t *)NULL;
+ }
+ }
+
+ return epc;
+}
+
+frame os::fetch_compiled_frame_from_context(const void* ucVoid) {
+ const ucontext_t* uc = (const ucontext_t*)ucVoid;
+ // In compiled code, the stack banging is performed before RA
+ // has been saved in the frame. RA is live, and SP and FP
+ // belong to the caller.
+ intptr_t* frame_fp = os::Linux::ucontext_get_fp(uc);
+ intptr_t* frame_sp = os::Linux::ucontext_get_sp(uc);
+ address frame_pc = (address)(uc->uc_mcontext.__gregs[REG_LR]
+ - NativeInstruction::instruction_size);
+ return frame(frame_sp, frame_fp, frame_pc);
+}
+
+frame os::fetch_frame_from_context(const void* ucVoid) {
+ intptr_t* frame_sp = NULL;
+ intptr_t* frame_fp = NULL;
+ address epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp);
+ return frame(frame_sp, frame_fp, epc);
+}
+
+// By default, gcc always saves frame pointer rfp on this stack. This
+// may get turned off by -fomit-frame-pointer.
+frame os::get_sender_for_C_frame(frame* fr) {
+ return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
+}
+
+NOINLINE frame os::current_frame() {
+ intptr_t **sender_sp = (intptr_t **)__builtin_frame_address(0);
+ if (sender_sp != NULL) {
+ frame myframe((intptr_t*)os::current_stack_pointer(),
+ sender_sp[frame::link_offset],
+ CAST_FROM_FN_PTR(address, os::current_frame));
+ if (os::is_first_C_frame(&myframe)) {
+ // stack is not walkable
+ return frame();
+ } else {
+ return os::get_sender_for_C_frame(&myframe);
+ }
+ } else {
+ ShouldNotReachHere();
+ return frame();
+ }
+}
+
+// Utility functions
+bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
+ ucontext_t* uc, JavaThread* thread) {
+
+ // decide if this trap can be handled by a stub
+ address stub = NULL;
+
+ address pc = NULL;
+
+ //%note os_trap_1
+ if (info != NULL && uc != NULL && thread != NULL) {
+ pc = (address) os::Posix::ucontext_get_pc(uc);
+
+ address addr = (address) info->si_addr;
+
+ // Make sure the high order byte is sign extended, as it may be masked away by the hardware.
+ if ((uintptr_t(addr) & (uintptr_t(1) << 55)) != 0) {
+ addr = address(uintptr_t(addr) | (uintptr_t(0xFF) << 56));
+ }
+
+ // Handle ALL stack overflow variations here
+ if (sig == SIGSEGV) {
+ // check if fault address is within thread stack
+ if (thread->is_in_full_stack(addr)) {
+ if (os::Posix::handle_stack_overflow(thread, addr, pc, uc, &stub)) {
+ return true; // continue
+ }
+ }
+ }
+
+ if (thread->thread_state() == _thread_in_Java) {
+ // Java thread running in Java code => find exception handler if any
+ // a fault inside compiled code, the interpreter, or a stub
+
+ // Handle signal from NativeJump::patch_verified_entry().
+ if ((sig == SIGILL || sig == SIGTRAP)
+ && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) {
+ if (TraceTraps) {
+ tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL");
+ }
+ stub = SharedRuntime::get_handle_wrong_method_stub();
+ } else if (sig == SIGSEGV && SafepointMechanism::is_poll_address((address)info->si_addr)) {
+ stub = SharedRuntime::get_poll_stub(pc);
+ } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
+ // BugId 4454115: A read from a MappedByteBuffer can fault
+ // here if the underlying file has been truncated.
+ // Do not crash the VM in such a case.
+ CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
+ CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
+ bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc));
+ if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) {
+ address next_pc = pc + NativeCall::instruction_size;
+ if (is_unsafe_arraycopy) {
+ next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+ }
+ stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
+ }
+ } else if (sig == SIGILL && nativeInstruction_at(pc)->is_stop()) {
+ // Pull a pointer to the error message out of the instruction
+ // stream.
+ const uint64_t *detail_msg_ptr
+ = (uint64_t*)(pc + NativeInstruction::instruction_size);
+ const char *detail_msg = (const char *)*detail_msg_ptr;
+ const char *msg = "stop";
+ if (TraceTraps) {
+ tty->print_cr("trap: %s: (SIGILL)", msg);
+ }
+
+ // End life with a fatal error, message and detail message and the context.
+ // Note: no need to do any post-processing here (e.g. signal chaining)
+ va_list va_dummy;
+ VMError::report_and_die(thread, uc, NULL, 0, msg, detail_msg, va_dummy);
+ va_end(va_dummy);
+
+ ShouldNotReachHere();
+ } else if (sig == SIGFPE &&
+ (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) {
+ stub =
+ SharedRuntime::
+ continuation_for_implicit_exception(thread,
+ pc,
+ SharedRuntime::
+ IMPLICIT_DIVIDE_BY_ZERO);
+ } else if (sig == SIGSEGV &&
+ MacroAssembler::uses_implicit_null_check((void*)addr)) {
+ // Determination of interpreter/vtable stub/compiled code null exception
+ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
+ }
+ } else if ((thread->thread_state() == _thread_in_vm ||
+ thread->thread_state() == _thread_in_native) &&
+ sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
+ thread->doing_unsafe_access()) {
+ address next_pc = pc + NativeCall::instruction_size;
+ if (UnsafeCopyMemory::contains_pc(pc)) {
+ next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+ }
+ stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
+ }
+
+ // jni_fast_GetField can trap at certain pc's if a GC kicks in
+ // and the heap gets shrunk before the field access.
+ if ((sig == SIGSEGV) || (sig == SIGBUS)) {
+ address addr_slow = JNI_FastGetField::find_slowcase_pc(pc);
+ if (addr_slow != (address)-1) {
+ stub = addr_slow;
+ }
+ }
+ }
+
+ if (stub != NULL) {
+ // save all thread context in case we need to restore it
+ if (thread != NULL) {
+ thread->set_saved_exception_pc(pc);
+ }
+
+ os::Posix::ucontext_set_pc(uc, stub);
+ return true;
+ }
+
+ return false; // Mute compiler
+}
+
+void os::Linux::init_thread_fpu_state(void) {
+}
+
+int os::Linux::get_fpu_control_word(void) {
+ return 0;
+}
+
+void os::Linux::set_fpu_control_word(int fpu_control) {
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// thread stack
+
+// Minimum usable stack sizes required to get to user code. Space for
+// HotSpot guard pages is added later.
+size_t os::Posix::_compiler_thread_min_stack_allowed = 72 * K;
+size_t os::Posix::_java_thread_min_stack_allowed = 72 * K;
+size_t os::Posix::_vm_internal_thread_min_stack_allowed = 72 * K;
+
+// return default stack size for thr_type
+size_t os::Posix::default_stack_size(os::ThreadType thr_type) {
+ // default stack size (compiler thread needs larger stack)
+ size_t s = (thr_type == os::compiler_thread ? 4 * M : 1 * M);
+ return s;
+}
+
+/////////////////////////////////////////////////////////////////////////////
+// helper functions for fatal error handler
+
+static const char* reg_abi_names[] = {
+ "pc",
+ "x1(ra)", "x2(sp)", "x3(gp)", "x4(tp)",
+ "x5(t0)", "x6(t1)", "x7(t2)",
+ "x8(s0)", "x9(s1)",
+ "x10(a0)", "x11(a1)", "x12(a2)", "x13(a3)", "x14(a4)", "x15(a5)", "x16(a6)", "x17(a7)",
+ "x18(s2)", "x19(s3)", "x20(s4)", "x21(s5)", "x22(s6)", "x23(s7)", "x24(s8)", "x25(s9)", "x26(s10)", "x27(s11)",
+ "x28(t3)", "x29(t4)","x30(t5)", "x31(t6)"
+};
+
+void os::print_context(outputStream *st, const void *context) {
+ if (context == NULL) {
+ return;
+ }
+
+ const ucontext_t *uc = (const ucontext_t*)context;
+ st->print_cr("Registers:");
+ for (int r = 0; r < 32; r++) {
+ st->print("%-*.*s=", 8, 8, reg_abi_names[r]);
+ print_location(st, uc->uc_mcontext.__gregs[r]);
+ }
+ st->cr();
+
+ intptr_t *frame_sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
+ st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(frame_sp));
+ print_hex_dump(st, (address)frame_sp, (address)(frame_sp + 64), sizeof(intptr_t));
+ st->cr();
+
+ // Note: it may be unsafe to inspect memory near pc. For example, pc may
+ // point to garbage if entry point in an nmethod is corrupted. Leave
+ // this at the end, and hope for the best.
+ address pc = os::Posix::ucontext_get_pc(uc);
+ print_instructions(st, pc, sizeof(char));
+ st->cr();
+}
+
+void os::print_register_info(outputStream *st, const void *context) {
+ if (context == NULL) {
+ return;
+ }
+
+ const ucontext_t *uc = (const ucontext_t*)context;
+
+ st->print_cr("Register to memory mapping:");
+ st->cr();
+
+ // this is horrendously verbose but the layout of the registers in the
+ // context does not match how we defined our abstract Register set, so
+ // we can't just iterate through the gregs area
+
+ // this is only for the "general purpose" registers
+
+ for (int r = 0; r < 32; r++)
+ st->print_cr("%-*.*s=" INTPTR_FORMAT, 8, 8, reg_abi_names[r], (uintptr_t)uc->uc_mcontext.__gregs[r]);
+ st->cr();
+}
+
+void os::setup_fpu() {
+}
+
+#ifndef PRODUCT
+void os::verify_stack_alignment() {
+ assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
+}
+#endif
+
+int os::extra_bang_size_in_bytes() {
+ return 0;
+}
+
+extern "C" {
+ int SpinPause() {
+ return 0;
+ }
+
+ void _Copy_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
+ if (from > to) {
+ const jshort *end = from + count;
+ while (from < end) {
+ *(to++) = *(from++);
+ }
+ } else if (from < to) {
+ const jshort *end = from;
+ from += count - 1;
+ to += count - 1;
+ while (from >= end) {
+ *(to--) = *(from--);
+ }
+ }
+ }
+ void _Copy_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
+ if (from > to) {
+ const jint *end = from + count;
+ while (from < end) {
+ *(to++) = *(from++);
+ }
+ } else if (from < to) {
+ const jint *end = from;
+ from += count - 1;
+ to += count - 1;
+ while (from >= end) {
+ *(to--) = *(from--);
+ }
+ }
+ }
+ void _Copy_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
+ if (from > to) {
+ const jlong *end = from + count;
+ while (from < end) {
+ os::atomic_copy64(from++, to++);
+ }
+ } else if (from < to) {
+ const jlong *end = from;
+ from += count - 1;
+ to += count - 1;
+ while (from >= end) {
+ os::atomic_copy64(from--, to--);
+ }
+ }
+ }
+
+ void _Copy_arrayof_conjoint_bytes(const HeapWord* from,
+ HeapWord* to,
+ size_t count) {
+ memmove(to, from, count);
+ }
+ void _Copy_arrayof_conjoint_jshorts(const HeapWord* from,
+ HeapWord* to,
+ size_t count) {
+ memmove(to, from, count * 2);
+ }
+ void _Copy_arrayof_conjoint_jints(const HeapWord* from,
+ HeapWord* to,
+ size_t count) {
+ memmove(to, from, count * 4);
+ }
+ void _Copy_arrayof_conjoint_jlongs(const HeapWord* from,
+ HeapWord* to,
+ size_t count) {
+ memmove(to, from, count * 8);
+ }
+};
diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..6d4156306617d891df5e643495d03812c9eaaf96
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP
+#define OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP
+
+ static void setup_fpu();
+
+ // Used to register dynamic code cache area with the OS
+ // Note: Currently only used in 64 bit Windows implementations
+ static bool register_code_area(char *low, char *high) { return true; }
+
+ // Atomically copy 64 bits of data
+ static void atomic_copy64(const volatile void *src, volatile void *dst) {
+ *(jlong *) dst = *(const jlong *) src;
+ }
+
+ // SYSCALL_RISCV_FLUSH_ICACHE is used to flush instruction cache. The "fence.i" instruction
+ // only work on the current hart, so kernel provides the icache flush syscall to flush icache
+ // on each hart. You can pass a flag to determine a global or local icache flush.
+ static void icache_flush(long int start, long int end)
+ {
+ const int SYSCALL_RISCV_FLUSH_ICACHE = 259;
+ register long int __a7 asm ("a7") = SYSCALL_RISCV_FLUSH_ICACHE;
+ register long int __a0 asm ("a0") = start;
+ register long int __a1 asm ("a1") = end;
+ // the flush can be applied to either all threads or only the current.
+ // 0 means a global icache flush, and the icache flush will be applied
+ // to other harts concurrently executing.
+ register long int __a2 asm ("a2") = 0;
+ __asm__ volatile ("ecall\n\t"
+ : "+r" (__a0)
+ : "r" (__a0), "r" (__a1), "r" (__a2), "r" (__a7)
+ : "memory");
+ }
+
+#endif // OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP
diff --git a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..a6432c84ec71ff0ed92d0054c569fb6d71791aee
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP
+#define OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP
+
+#include "runtime/prefetch.hpp"
+
+
+inline void Prefetch::read (const void *loc, intx interval) {
+}
+
+inline void Prefetch::write(void *loc, intx interval) {
+}
+
+#endif // OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP
diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3100572e9fdec5ef9eb6567f6956ba4a83f1e0f6
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/thread.inline.hpp"
+
+frame JavaThread::pd_last_frame() {
+ assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
+ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
+}
+
+// For Forte Analyzer AsyncGetCallTrace profiling support - thread is
+// currently interrupted by SIGPROF
+bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr,
+ void* ucontext, bool isInJava) {
+
+ assert(Thread::current() == this, "caller must be current thread");
+ return pd_get_top_frame(fr_addr, ucontext, isInJava);
+}
+
+bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) {
+ return pd_get_top_frame(fr_addr, ucontext, isInJava);
+}
+
+bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) {
+ // If we have a last_Java_frame, then we should use it even if
+ // isInJava == true. It should be more reliable than ucontext info.
+ if (has_last_Java_frame() && frame_anchor()->walkable()) {
+ *fr_addr = pd_last_frame();
+ return true;
+ }
+
+ // At this point, we don't have a last_Java_frame, so
+ // we try to glean some information out of the ucontext
+ // if we were running Java code when SIGPROF came in.
+ if (isInJava) {
+ ucontext_t* uc = (ucontext_t*) ucontext;
+
+ intptr_t* ret_fp = NULL;
+ intptr_t* ret_sp = NULL;
+ address addr = os::fetch_frame_from_context(uc, &ret_sp, &ret_fp);
+ if (addr == NULL || ret_sp == NULL ) {
+ // ucontext wasn't useful
+ return false;
+ }
+
+ frame ret_frame(ret_sp, ret_fp, addr);
+ if (!ret_frame.safe_for_sender(this)) {
+#ifdef COMPILER2
+ frame ret_frame2(ret_sp, NULL, addr);
+ if (!ret_frame2.safe_for_sender(this)) {
+ // nothing else to try if the frame isn't good
+ return false;
+ }
+ ret_frame = ret_frame2;
+#else
+ // nothing else to try if the frame isn't good
+ return false;
+#endif /* COMPILER2 */
+ }
+ *fr_addr = ret_frame;
+ return true;
+ }
+
+ // nothing else to try
+ return false;
+}
+
+void JavaThread::cache_global_variables() { }
diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..61e2cf85b63001b7075bae8622b6408b2d5f8a83
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP
+#define OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP
+
+ private:
+ void pd_initialize() {
+ _anchor.clear();
+ }
+
+ frame pd_last_frame();
+
+ public:
+ static ByteSize last_Java_fp_offset() {
+ return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
+ }
+
+ bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
+ bool isInJava);
+
+ bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
+private:
+ bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
+
+#endif // OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP
diff --git a/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..6cf7683a58602d4831d89b126c250338f4a82e2e
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP
+#define OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP
+
+// These are the OS and CPU-specific fields, types and integer
+// constants required by the Serviceability Agent. This file is
+// referenced by vmStructs.cpp.
+
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
+ \
+ /******************************/ \
+ /* Threads (NOTE: incomplete) */ \
+ /******************************/ \
+ nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \
+ nonstatic_field(OSThread, _pthread_id, pthread_t)
+
+
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
+ \
+ /**********************/ \
+ /* Posix Thread IDs */ \
+ /**********************/ \
+ \
+ declare_integer_type(OSThread::thread_id_t) \
+ declare_unsigned_integer_type(pthread_t)
+
+#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
+
+#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
+
+#endif // OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP
diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a954dcf5768ce4cf8f70d51fdc5a2c56b3460754
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/register.hpp"
+#include "runtime/os.hpp"
+#include "runtime/os.inline.hpp"
+#include "runtime/vm_version.hpp"
+
+#include
+#include
+
+#ifndef HWCAP_ISA_I
+#define HWCAP_ISA_I (1 << ('I' - 'A'))
+#endif
+
+#ifndef HWCAP_ISA_M
+#define HWCAP_ISA_M (1 << ('M' - 'A'))
+#endif
+
+#ifndef HWCAP_ISA_A
+#define HWCAP_ISA_A (1 << ('A' - 'A'))
+#endif
+
+#ifndef HWCAP_ISA_F
+#define HWCAP_ISA_F (1 << ('F' - 'A'))
+#endif
+
+#ifndef HWCAP_ISA_D
+#define HWCAP_ISA_D (1 << ('D' - 'A'))
+#endif
+
+#ifndef HWCAP_ISA_C
+#define HWCAP_ISA_C (1 << ('C' - 'A'))
+#endif
+
+#ifndef HWCAP_ISA_V
+#define HWCAP_ISA_V (1 << ('V' - 'A'))
+#endif
+
+#define read_csr(csr) \
+({ \
+ register unsigned long __v; \
+ __asm__ __volatile__ ("csrr %0, %1" \
+ : "=r" (__v) \
+ : "i" (csr) \
+ : "memory"); \
+ __v; \
+})
+
+uint32_t VM_Version::get_current_vector_length() {
+ assert(_features & CPU_V, "should not call this");
+ return (uint32_t)read_csr(CSR_VLENB);
+}
+
+void VM_Version::get_os_cpu_info() {
+
+ uint64_t auxv = getauxval(AT_HWCAP);
+
+ static_assert(CPU_I == HWCAP_ISA_I, "Flag CPU_I must follow Linux HWCAP");
+ static_assert(CPU_M == HWCAP_ISA_M, "Flag CPU_M must follow Linux HWCAP");
+ static_assert(CPU_A == HWCAP_ISA_A, "Flag CPU_A must follow Linux HWCAP");
+ static_assert(CPU_F == HWCAP_ISA_F, "Flag CPU_F must follow Linux HWCAP");
+ static_assert(CPU_D == HWCAP_ISA_D, "Flag CPU_D must follow Linux HWCAP");
+ static_assert(CPU_C == HWCAP_ISA_C, "Flag CPU_C must follow Linux HWCAP");
+ static_assert(CPU_V == HWCAP_ISA_V, "Flag CPU_V must follow Linux HWCAP");
+
+ // RISC-V has four bit-manipulation ISA-extensions: Zba/Zbb/Zbc/Zbs.
+ // Availability for those extensions could not be queried from HWCAP.
+ // TODO: Add proper detection for those extensions.
+ _features = auxv & (
+ HWCAP_ISA_I |
+ HWCAP_ISA_M |
+ HWCAP_ISA_A |
+ HWCAP_ISA_F |
+ HWCAP_ISA_D |
+ HWCAP_ISA_C |
+ HWCAP_ISA_V);
+
+ if (FILE *f = fopen("/proc/cpuinfo", "r")) {
+ char buf[512], *p;
+ while (fgets(buf, sizeof (buf), f) != NULL) {
+ if ((p = strchr(buf, ':')) != NULL) {
+ if (strncmp(buf, "uarch", sizeof "uarch" - 1) == 0) {
+ char* uarch = os::strdup(p + 2);
+ uarch[strcspn(uarch, "\n")] = '\0';
+ _uarch = uarch;
+ break;
+ }
+ }
+ }
+ fclose(f);
+ }
+}
diff --git a/src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp b/src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp
index 53b017d221725486a1b41944092e99d30565aa49..26ec71d258a1f4c87b973329fbf289482dd8e7ec 100644
--- a/src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp
+++ b/src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2019 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -304,7 +304,7 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
if (nm != NULL && nm->has_unsafe_access()) {
- // We don't really need a stub here! Just set the pending exeption and
+ // We don't really need a stub here! Just set the pending exception and
// continue at the next instruction after the faulting read. Returning
// garbage from this read is ok.
thread->set_pending_unsafe_access_error();
@@ -329,7 +329,7 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
} else if ((thread->thread_state() == _thread_in_vm ||
thread->thread_state() == _thread_in_native) &&
sig == SIGBUS && thread->doing_unsafe_access()) {
- // We don't really need a stub here! Just set the pending exeption and
+ // We don't really need a stub here! Just set the pending exception and
// continue at the next instruction after the faulting read. Returning
// garbage from this read is ok.
thread->set_pending_unsafe_access_error();
diff --git a/src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp b/src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp
index 5e346efee54e0279d03d4d2a7173600192208bf7..c6b945fdd7903e69ed661f5ca1778a511bbc253f 100644
--- a/src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp
+++ b/src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp
@@ -459,11 +459,26 @@ bool os::supports_sse() {
}
juint os::cpu_microcode_revision() {
+ // Note: this code runs on startup, and therefore should not be slow,
+ // see JDK-8283200.
+
juint result = 0;
- char data[2048] = {0}; // lines should fit in 2K buf
- size_t len = sizeof(data);
- FILE *fp = os::fopen("/proc/cpuinfo", "r");
+
+ // Attempt 1 (faster): Read the microcode version off the sysfs.
+ FILE *fp = os::fopen("/sys/devices/system/cpu/cpu0/microcode/version", "r");
+ if (fp) {
+ int read = fscanf(fp, "%x", &result);
+ fclose(fp);
+ if (read > 0) {
+ return result;
+ }
+ }
+
+ // Attempt 2 (slower): Read the microcode version off the procfs.
+ fp = os::fopen("/proc/cpuinfo", "r");
if (fp) {
+ char data[2048] = {0}; // lines should fit in 2K buf
+ size_t len = sizeof(data);
while (!feof(fp)) {
if (fgets(data, len, fp)) {
if (strstr(data, "microcode") != NULL) {
@@ -475,6 +490,7 @@ juint os::cpu_microcode_revision() {
}
fclose(fp);
}
+
return result;
}
diff --git a/src/hotspot/os_cpu/windows_x86/assembler_windows_x86.cpp b/src/hotspot/os_cpu/windows_x86/assembler_windows_x86.cpp
index 8045f792b7669efb43117b5cfbe81b5f110c23a9..3cc0003cd4c173c887df6394fad4423e54f8d06f 100644
--- a/src/hotspot/os_cpu/windows_x86/assembler_windows_x86.cpp
+++ b/src/hotspot/os_cpu/windows_x86/assembler_windows_x86.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -47,7 +47,7 @@ void MacroAssembler::int3() {
// Warning: This mechanism assumes that we only attempt to get the
// thread when we are nested below a call wrapper.
//
-// movl reg, fs:[0] Get exeception pointer
+// movl reg, fs:[0] Get exception pointer
// movl reg, [reg + thread_ptr_offset] Load thread
//
void MacroAssembler::get_thread(Register thread) {
diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp
index 274e623ea61872c9072d240a718f5a445064b3ed..ba65dd706fb251bf3a7ca03f9f5e72ad408f58d3 100644
--- a/src/hotspot/share/adlc/formssel.cpp
+++ b/src/hotspot/share/adlc/formssel.cpp
@@ -612,7 +612,7 @@ bool InstructForm::needs_anti_dependence_check(FormDict &globals) const {
strcmp(_matrule->_rChild->_opType,"StrEquals" )==0 ||
strcmp(_matrule->_rChild->_opType,"StrIndexOf" )==0 ||
strcmp(_matrule->_rChild->_opType,"StrIndexOfChar" )==0 ||
- strcmp(_matrule->_rChild->_opType,"HasNegatives" )==0 ||
+ strcmp(_matrule->_rChild->_opType,"CountPositives" )==0 ||
strcmp(_matrule->_rChild->_opType,"AryEq" )==0 ))
return true;
@@ -902,7 +902,7 @@ uint InstructForm::oper_input_base(FormDict &globals) {
strcmp(_matrule->_rChild->_opType,"StrCompressedCopy" )==0 ||
strcmp(_matrule->_rChild->_opType,"StrIndexOf")==0 ||
strcmp(_matrule->_rChild->_opType,"StrIndexOfChar")==0 ||
- strcmp(_matrule->_rChild->_opType,"HasNegatives")==0 ||
+ strcmp(_matrule->_rChild->_opType,"CountPositives")==0 ||
strcmp(_matrule->_rChild->_opType,"EncodeISOArray")==0)) {
// String.(compareTo/equals/indexOf) and Arrays.equals
// and sun.nio.cs.iso8859_1$Encoder.EncodeISOArray
@@ -4212,7 +4212,7 @@ bool MatchRule::is_vector() const {
"CMoveVD", "CMoveVF",
"DivVF","DivVD",
"AbsVB","AbsVS","AbsVI","AbsVL","AbsVF","AbsVD",
- "NegVF","NegVD","NegVI",
+ "NegVF","NegVD","NegVI","NegVL",
"SqrtVD","SqrtVF",
"AndV" ,"XorV" ,"OrV",
"MaxV", "MinV",
@@ -4239,6 +4239,7 @@ bool MatchRule::is_vector() const {
"FmaVD","FmaVF","PopCountVI", "PopCountVL", "VectorLongToMask",
// Next are vector mask ops.
"MaskAll", "AndVMask", "OrVMask", "XorVMask", "VectorMaskCast",
+ "RoundVF", "RoundVD",
// Next are not supported currently.
"PackB","PackS","PackI","PackL","PackF","PackD","Pack2L","Pack2D",
"ExtractB","ExtractUB","ExtractC","ExtractS","ExtractI","ExtractL","ExtractF","ExtractD"
diff --git a/src/hotspot/share/asm/register.hpp b/src/hotspot/share/asm/register.hpp
index 4207267ebab765e8a9deb354973a3280c55492f3..b8538e4df6810330e02f798b8baa4404f4d80c87 100644
--- a/src/hotspot/share/asm/register.hpp
+++ b/src/hotspot/share/asm/register.hpp
@@ -28,6 +28,7 @@
#include "utilities/debug.hpp"
#include "utilities/globalDefinitions.hpp"
#include "utilities/macros.hpp"
+#include "utilities/population_count.hpp"
// Use AbstractRegister as shortcut
class AbstractRegisterImpl;
@@ -86,6 +87,149 @@ const type name = ((type)value)
#define INTERNAL_VISIBILITY
#endif
+template class RegSetIterator;
+template class ReverseRegSetIterator;
+
+// A set of registers
+template
+class AbstractRegSet {
+ uint32_t _bitset;
+
+ AbstractRegSet(uint32_t bitset) : _bitset(bitset) { }
+
+public:
+
+ AbstractRegSet() : _bitset(0) { }
+
+ AbstractRegSet(RegImpl r1) : _bitset(1 << r1->encoding()) { }
+
+ AbstractRegSet operator+(const AbstractRegSet aSet) const {
+ AbstractRegSet result(_bitset | aSet._bitset);
+ return result;
+ }
+
+ AbstractRegSet operator-(const AbstractRegSet aSet) const {
+ AbstractRegSet result(_bitset & ~aSet._bitset);
+ return result;
+ }
+
+ AbstractRegSet &operator+=(const AbstractRegSet aSet) {
+ *this = *this + aSet;
+ return *this;
+ }
+
+ AbstractRegSet &operator-=(const AbstractRegSet aSet) {
+ *this = *this - aSet;
+ return *this;
+ }
+
+ static AbstractRegSet of(RegImpl r1) {
+ return AbstractRegSet(r1);
+ }
+
+ static AbstractRegSet of(RegImpl r1, RegImpl r2) {
+ return of(r1) + r2;
+ }
+
+ static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3) {
+ return of(r1, r2) + r3;
+ }
+
+ static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3, RegImpl r4) {
+ return of(r1, r2, r3) + r4;
+ }
+
+ static AbstractRegSet range(RegImpl start, RegImpl end) {
+ assert(start <= end, "must be");
+ uint32_t bits = ~0;
+ bits <<= start->encoding();
+ bits <<= 31 - end->encoding();
+ bits >>= 31 - end->encoding();
+
+ return AbstractRegSet(bits);
+ }
+
+ uint size() const { return population_count(_bitset); }
+
+ uint32_t bits() const { return _bitset; }
+
+private:
+
+ RegImpl first();
+ RegImpl last();
+
+public:
+
+ friend class RegSetIterator;
+ friend class ReverseRegSetIterator;
+
+ RegSetIterator begin();
+ ReverseRegSetIterator rbegin();
+};
+
+template
+class RegSetIterator {
+ AbstractRegSet _regs;
+
+public:
+ RegSetIterator(AbstractRegSet x): _regs(x) {}
+ RegSetIterator(const RegSetIterator& mit) : _regs(mit._regs) {}
+
+ RegSetIterator& operator++() {
+ RegImpl r = _regs.first();
+ if (r->is_valid())
+ _regs -= r;
+ return *this;
+ }
+
+ bool operator==(const RegSetIterator& rhs) const {
+ return _regs.bits() == rhs._regs.bits();
+ }
+ bool operator!=(const RegSetIterator& rhs) const {
+ return ! (rhs == *this);
+ }
+
+ RegImpl operator*() {
+ return _regs.first();
+ }
+};
+
+template
+inline RegSetIterator AbstractRegSet::begin() {
+ return RegSetIterator(*this);
+}
+
+template
+class ReverseRegSetIterator {
+ AbstractRegSet _regs;
+
+public:
+ ReverseRegSetIterator(AbstractRegSet x): _regs(x) {}
+ ReverseRegSetIterator(const ReverseRegSetIterator& mit) : _regs(mit._regs) {}
+
+ ReverseRegSetIterator& operator++() {
+ RegImpl r = _regs.last();
+ if (r->is_valid())
+ _regs -= r;
+ return *this;
+ }
+
+ bool operator==(const ReverseRegSetIterator& rhs) const {
+ return _regs.bits() == rhs._regs.bits();
+ }
+ bool operator!=(const ReverseRegSetIterator& rhs) const {
+ return ! (rhs == *this);
+ }
+
+ RegImpl operator*() {
+ return _regs.last();
+ }
+};
+
+template
+inline ReverseRegSetIterator AbstractRegSet::rbegin() {
+ return ReverseRegSetIterator(*this);
+}
#include CPU_HEADER(register)
diff --git a/src/hotspot/share/c1/c1_CFGPrinter.cpp b/src/hotspot/share/c1/c1_CFGPrinter.cpp
index 73a1d2d649e8686330a3a4f7bab3b3d48c867f14..f3b24e9ecb6b34d58e22703458b11227793410e4 100644
--- a/src/hotspot/share/c1/c1_CFGPrinter.cpp
+++ b/src/hotspot/share/c1/c1_CFGPrinter.cpp
@@ -244,13 +244,11 @@ void CFGPrinterOutput::print_block(BlockBegin* block) {
output()->cr();
output()->indent();
+ output()->print("successors ");
if (block->end() != NULL) {
- output()->print("successors ");
for (i = 0; i < block->number_of_sux(); i++) {
output()->print("\"B%d\" ", block->sux_at(i)->block_id());
}
- } else {
- output()->print("(block has no end, cannot print successors)");
}
output()->cr();
diff --git a/src/hotspot/share/c1/c1_CodeStubs.hpp b/src/hotspot/share/c1/c1_CodeStubs.hpp
index 63d44d7a7b69922a0805043a9e7f478ed4ff95dd..97bb9d60377d08d651fffe4388b57b9a36a2c35b 100644
--- a/src/hotspot/share/c1/c1_CodeStubs.hpp
+++ b/src/hotspot/share/c1/c1_CodeStubs.hpp
@@ -513,9 +513,6 @@ class SimpleExceptionStub: public CodeStub {
class ArrayStoreExceptionStub: public SimpleExceptionStub {
- private:
- CodeEmitInfo* _info;
-
public:
ArrayStoreExceptionStub(LIR_Opr obj, CodeEmitInfo* info): SimpleExceptionStub(Runtime1::throw_array_store_exception_id, obj, info) {}
#ifndef PRODUCT
diff --git a/src/hotspot/share/c1/c1_Compilation.cpp b/src/hotspot/share/c1/c1_Compilation.cpp
index ac415edb1f5dc3ed96b14c0ed8fdcdf364b631b4..baabbbd147bb82b696435f03fd50bcb114c03833 100644
--- a/src/hotspot/share/c1/c1_Compilation.cpp
+++ b/src/hotspot/share/c1/c1_Compilation.cpp
@@ -77,7 +77,6 @@ static int totalInstructionNodes = 0;
class PhaseTraceTime: public TraceTime {
private:
- JavaThread* _thread;
CompileLog* _log;
TimerName _timer;
diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp
index d27b9175c9da514cfceffa758b7ded5d894188c9..33f0a59c7585def5593aa115f832135af5e4a243 100644
--- a/src/hotspot/share/c1/c1_LIR.cpp
+++ b/src/hotspot/share/c1/c1_LIR.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -186,7 +186,6 @@ bool LIR_Opr::is_oop() const {
void LIR_Op2::verify() const {
#ifdef ASSERT
switch (code()) {
- case lir_cmove:
case lir_xchg:
break;
@@ -237,8 +236,7 @@ void LIR_Op2::verify() const {
LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BlockBegin* block)
- : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
- , _cond(cond)
+ : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
, _label(block->label())
, _block(block)
, _ublock(NULL)
@@ -246,8 +244,7 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BlockBegin* block)
}
LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, CodeStub* stub) :
- LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
- , _cond(cond)
+ LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
, _label(stub->entry())
, _block(NULL)
, _ublock(NULL)
@@ -255,8 +252,7 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, CodeStub* stub) :
}
LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BlockBegin* block, BlockBegin* ublock)
- : LIR_Op(lir_cond_float_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
- , _cond(cond)
+ : LIR_Op2(lir_cond_float_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
, _label(block->label())
, _block(block)
, _ublock(ublock)
@@ -278,13 +274,13 @@ void LIR_OpBranch::change_ublock(BlockBegin* b) {
}
void LIR_OpBranch::negate_cond() {
- switch (_cond) {
- case lir_cond_equal: _cond = lir_cond_notEqual; break;
- case lir_cond_notEqual: _cond = lir_cond_equal; break;
- case lir_cond_less: _cond = lir_cond_greaterEqual; break;
- case lir_cond_lessEqual: _cond = lir_cond_greater; break;
- case lir_cond_greaterEqual: _cond = lir_cond_less; break;
- case lir_cond_greater: _cond = lir_cond_lessEqual; break;
+ switch (cond()) {
+ case lir_cond_equal: set_cond(lir_cond_notEqual); break;
+ case lir_cond_notEqual: set_cond(lir_cond_equal); break;
+ case lir_cond_less: set_cond(lir_cond_greaterEqual); break;
+ case lir_cond_lessEqual: set_cond(lir_cond_greater); break;
+ case lir_cond_greaterEqual: set_cond(lir_cond_less); break;
+ case lir_cond_greater: set_cond(lir_cond_lessEqual); break;
default: ShouldNotReachHere();
}
}
@@ -507,6 +503,13 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
assert(op->as_OpBranch() != NULL, "must be");
LIR_OpBranch* opBranch = (LIR_OpBranch*)op;
+ assert(opBranch->_tmp1->is_illegal() && opBranch->_tmp2->is_illegal() &&
+ opBranch->_tmp3->is_illegal() && opBranch->_tmp4->is_illegal() &&
+ opBranch->_tmp5->is_illegal(), "not used");
+
+ if (opBranch->_opr1->is_valid()) do_input(opBranch->_opr1);
+ if (opBranch->_opr2->is_valid()) do_input(opBranch->_opr2);
+
if (opBranch->_info != NULL) do_info(opBranch->_info);
assert(opBranch->_result->is_illegal(), "not used");
if (opBranch->_stub != NULL) opBranch->stub()->visit(this);
@@ -595,17 +598,19 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
// to the result operand, otherwise the backend fails
case lir_cmove:
{
- assert(op->as_Op2() != NULL, "must be");
- LIR_Op2* op2 = (LIR_Op2*)op;
+ assert(op->as_Op4() != NULL, "must be");
+ LIR_Op4* op4 = (LIR_Op4*)op;
- assert(op2->_info == NULL && op2->_tmp1->is_illegal() && op2->_tmp2->is_illegal() &&
- op2->_tmp3->is_illegal() && op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used");
- assert(op2->_opr1->is_valid() && op2->_opr2->is_valid() && op2->_result->is_valid(), "used");
+ assert(op4->_info == NULL && op4->_tmp1->is_illegal() && op4->_tmp2->is_illegal() &&
+ op4->_tmp3->is_illegal() && op4->_tmp4->is_illegal() && op4->_tmp5->is_illegal(), "not used");
+ assert(op4->_opr1->is_valid() && op4->_opr2->is_valid() && op4->_result->is_valid(), "used");
- do_input(op2->_opr1);
- do_input(op2->_opr2);
- do_temp(op2->_opr2);
- do_output(op2->_result);
+ do_input(op4->_opr1);
+ do_input(op4->_opr2);
+ if (op4->_opr3->is_valid()) do_input(op4->_opr3);
+ if (op4->_opr4->is_valid()) do_input(op4->_opr4);
+ do_temp(op4->_opr2);
+ do_output(op4->_result);
break;
}
@@ -1049,6 +1054,10 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) {
masm->emit_op3(this);
}
+void LIR_Op4::emit_code(LIR_Assembler* masm) {
+ masm->emit_op4(this);
+}
+
void LIR_OpLock::emit_code(LIR_Assembler* masm) {
masm->emit_lock(this);
if (stub()) {
@@ -1089,6 +1098,10 @@ LIR_List::LIR_List(Compilation* compilation, BlockBegin* block)
, _file(NULL)
, _line(0)
#endif
+#ifdef RISCV
+ , _cmp_opr1(LIR_OprFact::illegalOpr)
+ , _cmp_opr2(LIR_OprFact::illegalOpr)
+#endif
{ }
@@ -1106,6 +1119,38 @@ void LIR_List::set_file_and_line(const char * file, int line) {
}
#endif
+#ifdef RISCV
+void LIR_List::set_cmp_oprs(LIR_Op* op) {
+ switch (op->code()) {
+ case lir_cmp:
+ _cmp_opr1 = op->as_Op2()->in_opr1();
+ _cmp_opr2 = op->as_Op2()->in_opr2();
+ break;
+ case lir_branch: // fall through
+ case lir_cond_float_branch:
+ assert(op->as_OpBranch()->cond() == lir_cond_always ||
+ (_cmp_opr1 != LIR_OprFact::illegalOpr && _cmp_opr2 != LIR_OprFact::illegalOpr),
+ "conditional branches must have legal operands");
+ if (op->as_OpBranch()->cond() != lir_cond_always) {
+ op->as_Op2()->set_in_opr1(_cmp_opr1);
+ op->as_Op2()->set_in_opr2(_cmp_opr2);
+ }
+ break;
+ case lir_cmove:
+ op->as_Op4()->set_in_opr3(_cmp_opr1);
+ op->as_Op4()->set_in_opr4(_cmp_opr2);
+ break;
+#if INCLUDE_ZGC
+ case lir_zloadbarrier_test:
+ _cmp_opr1 = FrameMap::as_opr(t1);
+ _cmp_opr2 = LIR_OprFact::intConst(0);
+ break;
+#endif
+ default:
+ break;
+ }
+}
+#endif
void LIR_List::append(LIR_InsertionBuffer* buffer) {
assert(this == buffer->lir_list(), "wrong lir list");
@@ -1677,7 +1722,6 @@ const char * LIR_Op::name() const {
case lir_cmp_l2i: s = "cmp_l2i"; break;
case lir_ucmp_fd2i: s = "ucomp_fd2i"; break;
case lir_cmp_fd2i: s = "comp_fd2i"; break;
- case lir_cmove: s = "cmove"; break;
case lir_add: s = "add"; break;
case lir_sub: s = "sub"; break;
case lir_mul: s = "mul"; break;
@@ -1700,6 +1744,8 @@ const char * LIR_Op::name() const {
case lir_irem: s = "irem"; break;
case lir_fmad: s = "fmad"; break;
case lir_fmaf: s = "fmaf"; break;
+ // LIR_Op4
+ case lir_cmove: s = "cmove"; break;
// LIR_OpJavaCall
case lir_static_call: s = "static"; break;
case lir_optvirtual_call: s = "optvirtual"; break;
@@ -1833,6 +1879,8 @@ void LIR_Op1::print_patch_code(outputStream* out, LIR_PatchCode code) {
// LIR_OpBranch
void LIR_OpBranch::print_instr(outputStream* out) const {
print_condition(out, cond()); out->print(" ");
+ in_opr1()->print(out); out->print(" ");
+ in_opr2()->print(out); out->print(" ");
if (block() != NULL) {
out->print("[B%d] ", block()->block_id());
} else if (stub() != NULL) {
@@ -1913,7 +1961,7 @@ void LIR_OpRoundFP::print_instr(outputStream* out) const {
// LIR_Op2
void LIR_Op2::print_instr(outputStream* out) const {
- if (code() == lir_cmove || code() == lir_cmp) {
+ if (code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch) {
print_condition(out, condition()); out->print(" ");
}
in_opr1()->print(out); out->print(" ");
@@ -1964,6 +2012,15 @@ void LIR_Op3::print_instr(outputStream* out) const {
result_opr()->print(out);
}
+// LIR_Op4
+void LIR_Op4::print_instr(outputStream* out) const {
+ print_condition(out, condition()); out->print(" ");
+ in_opr1()->print(out); out->print(" ");
+ in_opr2()->print(out); out->print(" ");
+ in_opr3()->print(out); out->print(" ");
+ in_opr4()->print(out); out->print(" ");
+ result_opr()->print(out);
+}
void LIR_OpLock::print_instr(outputStream* out) const {
hdr_opr()->print(out); out->print(" ");
diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp
index c4504b7b8b5cc39d18f37279eff13859faeabb87..c3afd5553a1dc7c59025318ca919aef47a32433c 100644
--- a/src/hotspot/share/c1/c1_LIR.hpp
+++ b/src/hotspot/share/c1/c1_LIR.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -887,6 +887,7 @@ class LIR_Op2;
class LIR_OpDelay;
class LIR_Op3;
class LIR_OpAllocArray;
+class LIR_Op4;
class LIR_OpCall;
class LIR_OpJavaCall;
class LIR_OpRTCall;
@@ -931,8 +932,6 @@ enum LIR_Code {
, lir_null_check
, lir_return
, lir_leal
- , lir_branch
- , lir_cond_float_branch
, lir_move
, lir_convert
, lir_alloc_object
@@ -943,11 +942,12 @@ enum LIR_Code {
, lir_load_klass
, end_op1
, begin_op2
+ , lir_branch
+ , lir_cond_float_branch
, lir_cmp
, lir_cmp_l2i
, lir_ucmp_fd2i
, lir_cmp_fd2i
- , lir_cmove
, lir_add
, lir_sub
, lir_mul
@@ -975,6 +975,9 @@ enum LIR_Code {
, lir_fmad
, lir_fmaf
, end_op3
+ , begin_op4
+ , lir_cmove
+ , end_op4
, begin_opJavaCall
, lir_static_call
, lir_optvirtual_call
@@ -1011,6 +1014,11 @@ enum LIR_Code {
, begin_opAssert
, lir_assert
, end_opAssert
+#ifdef INCLUDE_ZGC
+ , begin_opZLoadBarrierTest
+ , lir_zloadbarrier_test
+ , end_opZLoadBarrierTest
+#endif
};
@@ -1146,6 +1154,7 @@ class LIR_Op: public CompilationResourceObj {
virtual LIR_Op1* as_Op1() { return NULL; }
virtual LIR_Op2* as_Op2() { return NULL; }
virtual LIR_Op3* as_Op3() { return NULL; }
+ virtual LIR_Op4* as_Op4() { return NULL; }
virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; }
virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; }
virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; }
@@ -1419,45 +1428,6 @@ class LIR_OpRTCall: public LIR_OpCall {
};
-class LIR_OpBranch: public LIR_Op {
- friend class LIR_OpVisitState;
-
- private:
- LIR_Condition _cond;
- Label* _label;
- BlockBegin* _block; // if this is a branch to a block, this is the block
- BlockBegin* _ublock; // if this is a float-branch, this is the unorderd block
- CodeStub* _stub; // if this is a branch to a stub, this is the stub
-
- public:
- LIR_OpBranch(LIR_Condition cond, Label* lbl)
- : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL)
- , _cond(cond)
- , _label(lbl)
- , _block(NULL)
- , _ublock(NULL)
- , _stub(NULL) { }
-
- LIR_OpBranch(LIR_Condition cond, BlockBegin* block);
- LIR_OpBranch(LIR_Condition cond, CodeStub* stub);
-
- // for unordered comparisons
- LIR_OpBranch(LIR_Condition cond, BlockBegin* block, BlockBegin* ublock);
-
- LIR_Condition cond() const { return _cond; }
- Label* label() const { return _label; }
- BlockBegin* block() const { return _block; }
- BlockBegin* ublock() const { return _ublock; }
- CodeStub* stub() const { return _stub; }
-
- void change_block(BlockBegin* b);
- void change_ublock(BlockBegin* b);
- void negate_cond();
-
- virtual void emit_code(LIR_Assembler* masm);
- virtual LIR_OpBranch* as_OpBranch() { return this; }
- virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
-};
class LIR_OpReturn: public LIR_Op1 {
friend class LIR_OpVisitState;
@@ -1631,19 +1601,19 @@ class LIR_Op2: public LIR_Op {
void verify() const;
public:
- LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, CodeEmitInfo* info = NULL)
+ LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, CodeEmitInfo* info = NULL, BasicType type = T_ILLEGAL)
: LIR_Op(code, LIR_OprFact::illegalOpr, info)
, _fpu_stack_size(0)
, _opr1(opr1)
, _opr2(opr2)
- , _type(T_ILLEGAL)
+ , _type(type)
, _tmp1(LIR_OprFact::illegalOpr)
, _tmp2(LIR_OprFact::illegalOpr)
, _tmp3(LIR_OprFact::illegalOpr)
, _tmp4(LIR_OprFact::illegalOpr)
, _tmp5(LIR_OprFact::illegalOpr)
, _condition(condition) {
- assert(code == lir_cmp || code == lir_assert, "code check");
+ assert(code == lir_cmp || code == lir_branch || code == lir_cond_float_branch || code == lir_assert, "code check");
}
LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type)
@@ -1675,7 +1645,7 @@ class LIR_Op2: public LIR_Op {
, _tmp4(LIR_OprFact::illegalOpr)
, _tmp5(LIR_OprFact::illegalOpr)
, _condition(lir_cond_unknown) {
- assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
+ assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check");
}
LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr,
@@ -1691,7 +1661,7 @@ class LIR_Op2: public LIR_Op {
, _tmp4(tmp4)
, _tmp5(tmp5)
, _condition(lir_cond_unknown) {
- assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
+ assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check");
}
LIR_Opr in_opr1() const { return _opr1; }
@@ -1703,10 +1673,10 @@ class LIR_Op2: public LIR_Op {
LIR_Opr tmp4_opr() const { return _tmp4; }
LIR_Opr tmp5_opr() const { return _tmp5; }
LIR_Condition condition() const {
- assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition;
+ assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch || code() == lir_assert, "only valid for branch and assert"); return _condition;
}
void set_condition(LIR_Condition condition) {
- assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove"); _condition = condition;
+ assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch, "only valid for branch"); _condition = condition;
}
void set_fpu_stack_size(int size) { _fpu_stack_size = size; }
@@ -1720,6 +1690,51 @@ class LIR_Op2: public LIR_Op {
virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
};
+class LIR_OpBranch: public LIR_Op2 {
+ friend class LIR_OpVisitState;
+
+ private:
+ Label* _label;
+ BlockBegin* _block; // if this is a branch to a block, this is the block
+ BlockBegin* _ublock; // if this is a float-branch, this is the unorderd block
+ CodeStub* _stub; // if this is a branch to a stub, this is the stub
+
+ public:
+ LIR_OpBranch(LIR_Condition cond, Label* lbl)
+ : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL)
+ , _label(lbl)
+ , _block(NULL)
+ , _ublock(NULL)
+ , _stub(NULL) { }
+
+ LIR_OpBranch(LIR_Condition cond, BlockBegin* block);
+ LIR_OpBranch(LIR_Condition cond, CodeStub* stub);
+
+ // for unordered comparisons
+ LIR_OpBranch(LIR_Condition cond, BlockBegin* block, BlockBegin* ublock);
+
+ LIR_Condition cond() const {
+ return condition();
+ }
+
+ void set_cond(LIR_Condition cond) {
+ set_condition(cond);
+ }
+
+ Label* label() const { return _label; }
+ BlockBegin* block() const { return _block; }
+ BlockBegin* ublock() const { return _ublock; }
+ CodeStub* stub() const { return _stub; }
+
+ void change_block(BlockBegin* b);
+ void change_ublock(BlockBegin* b);
+ void negate_cond();
+
+ virtual void emit_code(LIR_Assembler* masm);
+ virtual LIR_OpBranch* as_OpBranch() { return this; }
+ virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
+};
+
class LIR_OpAllocArray : public LIR_Op {
friend class LIR_OpVisitState;
@@ -1783,6 +1798,63 @@ class LIR_Op3: public LIR_Op {
virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
};
+class LIR_Op4: public LIR_Op {
+ friend class LIR_OpVisitState;
+ protected:
+ LIR_Opr _opr1;
+ LIR_Opr _opr2;
+ LIR_Opr _opr3;
+ LIR_Opr _opr4;
+ BasicType _type;
+ LIR_Opr _tmp1;
+ LIR_Opr _tmp2;
+ LIR_Opr _tmp3;
+ LIR_Opr _tmp4;
+ LIR_Opr _tmp5;
+ LIR_Condition _condition;
+
+ public:
+ LIR_Op4(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr opr3, LIR_Opr opr4,
+ LIR_Opr result, BasicType type)
+ : LIR_Op(code, result, NULL)
+ , _opr1(opr1)
+ , _opr2(opr2)
+ , _opr3(opr3)
+ , _opr4(opr4)
+ , _type(type)
+ , _tmp1(LIR_OprFact::illegalOpr)
+ , _tmp2(LIR_OprFact::illegalOpr)
+ , _tmp3(LIR_OprFact::illegalOpr)
+ , _tmp4(LIR_OprFact::illegalOpr)
+ , _tmp5(LIR_OprFact::illegalOpr)
+ , _condition(condition) {
+ assert(code == lir_cmove, "code check");
+ assert(type != T_ILLEGAL, "cmove should have type");
+ }
+
+ LIR_Opr in_opr1() const { return _opr1; }
+ LIR_Opr in_opr2() const { return _opr2; }
+ LIR_Opr in_opr3() const { return _opr3; }
+ LIR_Opr in_opr4() const { return _opr4; }
+ BasicType type() const { return _type; }
+ LIR_Opr tmp1_opr() const { return _tmp1; }
+ LIR_Opr tmp2_opr() const { return _tmp2; }
+ LIR_Opr tmp3_opr() const { return _tmp3; }
+ LIR_Opr tmp4_opr() const { return _tmp4; }
+ LIR_Opr tmp5_opr() const { return _tmp5; }
+
+ LIR_Condition condition() const { return _condition; }
+ void set_condition(LIR_Condition condition) { _condition = condition; }
+
+ void set_in_opr1(LIR_Opr opr) { _opr1 = opr; }
+ void set_in_opr2(LIR_Opr opr) { _opr2 = opr; }
+ void set_in_opr3(LIR_Opr opr) { _opr3 = opr; }
+ void set_in_opr4(LIR_Opr opr) { _opr4 = opr; }
+ virtual void emit_code(LIR_Assembler* masm);
+ virtual LIR_Op4* as_Op4() { return this; }
+
+ virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
+};
//--------------------------------
class LabelObj: public CompilationResourceObj {
@@ -2022,6 +2094,10 @@ class LIR_List: public CompilationResourceObj {
const char * _file;
int _line;
#endif
+#ifdef RISCV
+ LIR_Opr _cmp_opr1;
+ LIR_Opr _cmp_opr2;
+#endif
public:
void append(LIR_Op* op) {
@@ -2034,6 +2110,12 @@ class LIR_List: public CompilationResourceObj {
}
#endif // PRODUCT
+#ifdef RISCV
+ set_cmp_oprs(op);
+ // lir_cmp set cmp oprs only on riscv
+ if (op->code() == lir_cmp) return;
+#endif
+
_operations.append(op);
#ifdef ASSERT
@@ -2050,6 +2132,10 @@ class LIR_List: public CompilationResourceObj {
void set_file_and_line(const char * file, int line);
#endif
+#ifdef RISCV
+ void set_cmp_oprs(LIR_Op* op);
+#endif
+
//---------- accessors ---------------
LIR_OpList* instructions_list() { return &_operations; }
int length() const { return _operations.length(); }
@@ -2166,8 +2252,9 @@ class LIR_List: public CompilationResourceObj {
void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info);
void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info);
- void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
- append(new LIR_Op2(lir_cmove, condition, src1, src2, dst, type));
+ void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type,
+ LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr) {
+ append(new LIR_Op4(lir_cmove, condition, src1, src2, cmp_opr1, cmp_opr2, dst, type));
}
void cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value,
diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp
index be0a6abc2ca22f1fec53838ec73a3a68b06d3056..331db6487562d4f59ec842ede160c449f9dd772b 100644
--- a/src/hotspot/share/c1/c1_LIRAssembler.cpp
+++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -32,7 +32,6 @@
#include "c1/c1_ValueStack.hpp"
#include "ci/ciInstance.hpp"
#include "compiler/oopMap.hpp"
-#include "gc/shared/barrierSet.hpp"
#include "runtime/os.hpp"
#include "runtime/vm_version.hpp"
@@ -104,7 +103,6 @@ PatchingStub::PatchID LIR_Assembler::patching_id(CodeEmitInfo* info) {
LIR_Assembler::LIR_Assembler(Compilation* c):
_masm(c->masm())
- , _bs(BarrierSet::barrier_set())
, _compilation(c)
, _frame_map(c->frame_map())
, _current_block(NULL)
@@ -691,10 +689,6 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
comp_fl2i(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op);
break;
- case lir_cmove:
- cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type());
- break;
-
case lir_shl:
case lir_shr:
case lir_ushr:
@@ -756,6 +750,17 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
}
}
+void LIR_Assembler::emit_op4(LIR_Op4* op) {
+ switch(op->code()) {
+ case lir_cmove:
+ cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type(), op->in_opr3(), op->in_opr4());
+ break;
+
+ default:
+ Unimplemented();
+ break;
+ }
+}
void LIR_Assembler::build_frame() {
_masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp
index f27ade60bae2869f06ae8a7d835a9f9ca8843ed1..8a50667c88ab8d40d91f82a02caba383def77038 100644
--- a/src/hotspot/share/c1/c1_LIRAssembler.hpp
+++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -32,13 +32,11 @@
class Compilation;
class ScopeValue;
-class BarrierSet;
class LIR_Assembler: public CompilationResourceObj {
private:
C1_MacroAssembler* _masm;
CodeStubList* _slow_case_stubs;
- BarrierSet* _bs;
Compilation* _compilation;
FrameMap* _frame_map;
@@ -186,6 +184,7 @@ class LIR_Assembler: public CompilationResourceObj {
void emit_op1(LIR_Op1* op);
void emit_op2(LIR_Op2* op);
void emit_op3(LIR_Op3* op);
+ void emit_op4(LIR_Op4* op);
void emit_opBranch(LIR_OpBranch* op);
void emit_opLabel(LIR_OpLabel* op);
void emit_arraycopy(LIR_OpArrayCopy* op);
@@ -219,8 +218,8 @@ class LIR_Assembler: public CompilationResourceObj {
void volatile_move_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info);
void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); // info set for null exceptions
void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op);
- void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type);
-
+ void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type,
+ LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr);
void call( LIR_OpJavaCall* op, relocInfo::relocType rtype);
void ic_call( LIR_OpJavaCall* op);
void vtable_call( LIR_OpJavaCall* op);
diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp
index d2386d7cbb6234a9587c432a300f397922751e42..9f0d264f2fa17abd226f3492ac3fb2ba5a4417e6 100644
--- a/src/hotspot/share/c1/c1_LinearScan.cpp
+++ b/src/hotspot/share/c1/c1_LinearScan.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -1240,11 +1240,11 @@ void LinearScan::add_register_hints(LIR_Op* op) {
break;
}
case lir_cmove: {
- assert(op->as_Op2() != NULL, "lir_cmove must be LIR_Op2");
- LIR_Op2* cmove = (LIR_Op2*)op;
+ assert(op->as_Op4() != NULL, "lir_cmove must be LIR_Op4");
+ LIR_Op4* cmove = (LIR_Op4*)op;
LIR_Opr move_from = cmove->in_opr1();
- LIR_Opr move_to = cmove->result_opr();
+ LIR_Opr move_to = cmove->result_opr();
if (move_to->is_register() && move_from->is_register()) {
Interval* from = interval_at(reg_num(move_from));
@@ -3131,6 +3131,9 @@ void LinearScan::do_linear_scan() {
}
}
+#ifndef RISCV
+ // Disable these optimizations on riscv temporarily, because it does not
+ // work when the comparison operands are bound to branches or cmoves.
{ TIME_LINEAR_SCAN(timer_optimize_lir);
EdgeMoveOptimizer::optimize(ir()->code());
@@ -3138,6 +3141,7 @@ void LinearScan::do_linear_scan() {
// check that cfg is still correct after optimizations
ir()->verify();
}
+#endif
NOT_PRODUCT(print_lir(1, "Before Code Generation", false));
NOT_PRODUCT(LinearScanStatistic::compute(this, _stat_final));
@@ -6361,14 +6365,14 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
// There might be a cmove inserted for profiling which depends on the same
// compare. If we change the condition of the respective compare, we have
// to take care of this cmove as well.
- LIR_Op2* prev_cmove = NULL;
+ LIR_Op4* prev_cmove = NULL;
for(int j = instructions->length() - 3; j >= 0 && prev_cmp == NULL; j--) {
prev_op = instructions->at(j);
// check for the cmove
if (prev_op->code() == lir_cmove) {
- assert(prev_op->as_Op2() != NULL, "cmove must be of type LIR_Op2");
- prev_cmove = (LIR_Op2*)prev_op;
+ assert(prev_op->as_Op4() != NULL, "cmove must be of type LIR_Op4");
+ prev_cmove = (LIR_Op4*)prev_op;
assert(prev_branch->cond() == prev_cmove->condition(), "should be the same");
}
if (prev_op->code() == lir_cmp) {
diff --git a/src/hotspot/share/cds/archiveBuilder.cpp b/src/hotspot/share/cds/archiveBuilder.cpp
index cb5c0aeb8c78008f044e0d009bddbd102fa38bbb..cff024a8afb905a8da8c1919a77156746cf31230 100644
--- a/src/hotspot/share/cds/archiveBuilder.cpp
+++ b/src/hotspot/share/cds/archiveBuilder.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,7 @@
#include "cds/archiveUtils.hpp"
#include "cds/cppVtables.hpp"
#include "cds/dumpAllocStats.hpp"
+#include "cds/heapShared.hpp"
#include "cds/metaspaceShared.hpp"
#include "classfile/classLoaderDataShared.hpp"
#include "classfile/symbolTable.hpp"
@@ -40,6 +41,7 @@
#include "memory/resourceArea.hpp"
#include "oops/instanceKlass.hpp"
#include "oops/objArrayKlass.hpp"
+#include "oops/objArrayOop.inline.hpp"
#include "oops/oopHandle.inline.hpp"
#include "runtime/arguments.hpp"
#include "runtime/globals_extension.hpp"
@@ -522,7 +524,8 @@ ArchiveBuilder::FollowMode ArchiveBuilder::get_follow_mode(MetaspaceClosure::Ref
if (MetaspaceShared::is_in_shared_metaspace(obj)) {
// Don't dump existing shared metadata again.
return point_to_it;
- } else if (ref->msotype() == MetaspaceObj::MethodDataType) {
+ } else if (ref->msotype() == MetaspaceObj::MethodDataType ||
+ ref->msotype() == MetaspaceObj::MethodCountersType) {
return set_to_null;
} else {
if (ref->msotype() == MetaspaceObj::ClassType) {
@@ -928,27 +931,29 @@ class ArchiveBuilder::CDSMapLogger : AllStatic {
}
// rw/ro regions only
- static void write_dump_region(const char* name, DumpRegion* region) {
+ static void log_metaspace_region(const char* name, DumpRegion* region,
+ const ArchiveBuilder::SourceObjList* src_objs) {
address region_base = address(region->base());
address region_top = address(region->top());
- write_region(name, region_base, region_top, region_base + buffer_to_runtime_delta());
+ log_region(name, region_base, region_top, region_base + buffer_to_runtime_delta());
+ log_metaspace_objects(region, src_objs);
}
#define _LOG_PREFIX PTR_FORMAT ": @@ %-17s %d"
- static void write_klass(Klass* k, address runtime_dest, const char* type_name, int bytes, Thread* current) {
+ static void log_klass(Klass* k, address runtime_dest, const char* type_name, int bytes, Thread* current) {
ResourceMark rm(current);
log_debug(cds, map)(_LOG_PREFIX " %s",
p2i(runtime_dest), type_name, bytes, k->external_name());
}
- static void write_method(Method* m, address runtime_dest, const char* type_name, int bytes, Thread* current) {
+ static void log_method(Method* m, address runtime_dest, const char* type_name, int bytes, Thread* current) {
ResourceMark rm(current);
log_debug(cds, map)(_LOG_PREFIX " %s",
p2i(runtime_dest), type_name, bytes, m->external_name());
}
// rw/ro regions only
- static void write_objects(DumpRegion* region, const ArchiveBuilder::SourceObjList* src_objs) {
+ static void log_metaspace_objects(DumpRegion* region, const ArchiveBuilder::SourceObjList* src_objs) {
address last_obj_base = address(region->base());
address last_obj_end = address(region->base());
address region_end = address(region->end());
@@ -957,7 +962,7 @@ class ArchiveBuilder::CDSMapLogger : AllStatic {
SourceObjInfo* src_info = src_objs->at(i);
address src = src_info->orig_obj();
address dest = src_info->dumped_addr();
- write_data(last_obj_base, dest, last_obj_base + buffer_to_runtime_delta());
+ log_data(last_obj_base, dest, last_obj_base + buffer_to_runtime_delta());
address runtime_dest = dest + buffer_to_runtime_delta();
int bytes = src_info->size_in_bytes();
@@ -966,21 +971,21 @@ class ArchiveBuilder::CDSMapLogger : AllStatic {
switch (type) {
case MetaspaceObj::ClassType:
- write_klass((Klass*)src, runtime_dest, type_name, bytes, current);
+ log_klass((Klass*)src, runtime_dest, type_name, bytes, current);
break;
case MetaspaceObj::ConstantPoolType:
- write_klass(((ConstantPool*)src)->pool_holder(),
+ log_klass(((ConstantPool*)src)->pool_holder(),
runtime_dest, type_name, bytes, current);
break;
case MetaspaceObj::ConstantPoolCacheType:
- write_klass(((ConstantPoolCache*)src)->constant_pool()->pool_holder(),
+ log_klass(((ConstantPoolCache*)src)->constant_pool()->pool_holder(),
runtime_dest, type_name, bytes, current);
break;
case MetaspaceObj::MethodType:
- write_method((Method*)src, runtime_dest, type_name, bytes, current);
+ log_method((Method*)src, runtime_dest, type_name, bytes, current);
break;
case MetaspaceObj::ConstMethodType:
- write_method(((ConstMethod*)src)->method(), runtime_dest, type_name, bytes, current);
+ log_method(((ConstMethod*)src)->method(), runtime_dest, type_name, bytes, current);
break;
case MetaspaceObj::SymbolType:
{
@@ -999,22 +1004,22 @@ class ArchiveBuilder::CDSMapLogger : AllStatic {
last_obj_end = dest + bytes;
}
- write_data(last_obj_base, last_obj_end, last_obj_base + buffer_to_runtime_delta());
+ log_data(last_obj_base, last_obj_end, last_obj_base + buffer_to_runtime_delta());
if (last_obj_end < region_end) {
log_debug(cds, map)(PTR_FORMAT ": @@ Misc data " SIZE_FORMAT " bytes",
p2i(last_obj_end + buffer_to_runtime_delta()),
size_t(region_end - last_obj_end));
- write_data(last_obj_end, region_end, last_obj_end + buffer_to_runtime_delta());
+ log_data(last_obj_end, region_end, last_obj_end + buffer_to_runtime_delta());
}
}
#undef _LOG_PREFIX
- // Write information about a region, whose address at dump time is [base .. top). At
+ // Log information about a region, whose address at dump time is [base .. top). At
// runtime, this region will be mapped to runtime_base. runtime_base is 0 if this
// region will be mapped at os-selected addresses (such as the bitmap region), or will
// be accessed with os::read (the header).
- static void write_region(const char* name, address base, address top, address runtime_base) {
+ static void log_region(const char* name, address base, address top, address runtime_base) {
size_t size = top - base;
base = runtime_base;
top = runtime_base + size;
@@ -1023,27 +1028,63 @@ class ArchiveBuilder::CDSMapLogger : AllStatic {
}
// open and closed archive regions
- static void write_heap_region(const char* which, GrowableArray *regions) {
+ static void log_heap_regions(const char* which, GrowableArray *regions) {
+#if INCLUDE_CDS_JAVA_HEAP
for (int i = 0; i < regions->length(); i++) {
address start = address(regions->at(i).start());
address end = address(regions->at(i).end());
- write_region(which, start, end, start);
- write_data(start, end, start);
+ log_region(which, start, end, start);
+
+ while (start < end) {
+ size_t byte_size;
+ oop archived_oop = cast_to_oop(start);
+ oop original_oop = HeapShared::get_original_object(archived_oop);
+ if (original_oop != NULL) {
+ ResourceMark rm;
+ log_info(cds, map)(PTR_FORMAT ": @@ Object %s",
+ p2i(start), original_oop->klass()->external_name());
+ byte_size = original_oop->size() * BytesPerWord;
+ } else if (archived_oop == HeapShared::roots()) {
+ // HeapShared::roots() is copied specially so it doesn't exist in
+ // HeapShared::OriginalObjectTable. See HeapShared::copy_roots().
+ log_info(cds, map)(PTR_FORMAT ": @@ Object HeapShared:roots (ObjArray)",
+ p2i(start));
+ byte_size = objArrayOopDesc::object_size(HeapShared::roots()->length()) * BytesPerWord;
+ } else {
+ // We have reached the end of the region
+ break;
+ }
+ address oop_end = start + byte_size;
+ log_data(start, oop_end, start, /*is_heap=*/true);
+ start = oop_end;
+ }
+ if (start < end) {
+ log_info(cds, map)(PTR_FORMAT ": @@ Unused heap space " SIZE_FORMAT " bytes",
+ p2i(start), size_t(end - start));
+ log_data(start, end, start, /*is_heap=*/true);
+ }
}
+#endif
}
- // Dump all the data [base...top). Pretend that the base address
+ // Log all the data [base...top). Pretend that the base address
// will be mapped to runtime_base at run-time.
- static void write_data(address base, address top, address runtime_base) {
+ static void log_data(address base, address top, address runtime_base, bool is_heap = false) {
assert(top >= base, "must be");
LogStreamHandle(Trace, cds, map) lsh;
if (lsh.is_enabled()) {
- os::print_hex_dump(&lsh, base, top, sizeof(address), 32, runtime_base);
+ int unitsize = sizeof(address);
+ if (is_heap && UseCompressedOops) {
+ // This makes the compressed oop pointers easier to read, but
+ // longs and doubles will be split into two words.
+ unitsize = sizeof(narrowOop);
+ }
+ os::print_hex_dump(&lsh, base, top, unitsize, 32, runtime_base);
}
}
- static void write_header(FileMapInfo* mapinfo) {
+ static void log_header(FileMapInfo* mapinfo) {
LogStreamHandle(Info, cds, map) lsh;
if (lsh.is_enabled()) {
mapinfo->print(&lsh);
@@ -1051,41 +1092,38 @@ class ArchiveBuilder::CDSMapLogger : AllStatic {
}
public:
- static void write(ArchiveBuilder* builder, FileMapInfo* mapinfo,
- GrowableArray *closed_heap_regions,
- GrowableArray *open_heap_regions,
- char* bitmap, size_t bitmap_size_in_bytes) {
+ static void log(ArchiveBuilder* builder, FileMapInfo* mapinfo,
+ GrowableArray *closed_heap_regions,
+ GrowableArray *open_heap_regions,
+ char* bitmap, size_t bitmap_size_in_bytes) {
log_info(cds, map)("%s CDS archive map for %s", DumpSharedSpaces ? "Static" : "Dynamic", mapinfo->full_path());
address header = address(mapinfo->header());
address header_end = header + mapinfo->header()->header_size();
- write_region("header", header, header_end, 0);
- write_header(mapinfo);
- write_data(header, header_end, 0);
+ log_region("header", header, header_end, 0);
+ log_header(mapinfo);
+ log_data(header, header_end, 0);
DumpRegion* rw_region = &builder->_rw_region;
DumpRegion* ro_region = &builder->_ro_region;
- write_dump_region("rw region", rw_region);
- write_objects(rw_region, &builder->_rw_src_objs);
-
- write_dump_region("ro region", ro_region);
- write_objects(ro_region, &builder->_ro_src_objs);
+ log_metaspace_region("rw region", rw_region, &builder->_rw_src_objs);
+ log_metaspace_region("ro region", ro_region, &builder->_ro_src_objs);
address bitmap_end = address(bitmap + bitmap_size_in_bytes);
- write_region("bitmap", address(bitmap), bitmap_end, 0);
- write_data(header, header_end, 0);
+ log_region("bitmap", address(bitmap), bitmap_end, 0);
+ log_data((address)bitmap, bitmap_end, 0);
if (closed_heap_regions != NULL) {
- write_heap_region("closed heap region", closed_heap_regions);
+ log_heap_regions("closed heap region", closed_heap_regions);
}
if (open_heap_regions != NULL) {
- write_heap_region("open heap region", open_heap_regions);
+ log_heap_regions("open heap region", open_heap_regions);
}
log_info(cds, map)("[End of CDS archive map]");
}
-};
+}; // end ArchiveBuilder::CDSMapLogger
void ArchiveBuilder::print_stats() {
_alloc_stats.print_stats(int(_ro_region.used()), int(_rw_region.used()));
@@ -1139,9 +1177,10 @@ void ArchiveBuilder::write_archive(FileMapInfo* mapinfo,
}
if (log_is_enabled(Info, cds, map)) {
- CDSMapLogger::write(this, mapinfo, closed_heap_regions, open_heap_regions,
- bitmap, bitmap_size_in_bytes);
+ CDSMapLogger::log(this, mapinfo, closed_heap_regions, open_heap_regions,
+ bitmap, bitmap_size_in_bytes);
}
+ CDS_JAVA_HEAP_ONLY(HeapShared::destroy_archived_object_cache());
FREE_C_HEAP_ARRAY(char, bitmap);
}
diff --git a/src/hotspot/share/cds/archiveUtils.hpp b/src/hotspot/share/cds/archiveUtils.hpp
index 588ad1b6da921152f1caaf4827504286d2e969ba..be8d8a0e84ed5add863781a91217db12e9fce2ee 100644
--- a/src/hotspot/share/cds/archiveUtils.hpp
+++ b/src/hotspot/share/cds/archiveUtils.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2019, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -30,6 +30,7 @@
#include "memory/virtualspace.hpp"
#include "utilities/bitMap.hpp"
#include "utilities/exceptions.hpp"
+#include "utilities/macros.hpp"
class BootstrapInfo;
class ReservedSpace;
@@ -147,7 +148,7 @@ public:
char* expand_top_to(char* newtop);
char* allocate(size_t num_bytes);
- void append_intptr_t(intptr_t n, bool need_to_mark = false);
+ void append_intptr_t(intptr_t n, bool need_to_mark = false) NOT_CDS_RETURN;
char* base() const { return _base; }
char* top() const { return _top; }
diff --git a/src/hotspot/share/cds/dumpTimeClassInfo.hpp b/src/hotspot/share/cds/dumpTimeClassInfo.hpp
index 80f6d81442c6d407c560ed29313be34286001407..5b4f5cd9b9beb1f494c5a2a68b8fba2aae757657 100644
--- a/src/hotspot/share/cds/dumpTimeClassInfo.hpp
+++ b/src/hotspot/share/cds/dumpTimeClassInfo.hpp
@@ -167,7 +167,8 @@ public:
size_t runtime_info_bytesize() const;
};
-inline unsigned DumpTimeSharedClassTable_hash(InstanceKlass* const& k) {
+template
+inline unsigned DumpTimeSharedClassTable_hash(T* const& k) {
if (DumpSharedSpaces) {
// Deterministic archive contents
uintx delta = k->name() - MetaspaceShared::symbol_rs_base();
@@ -175,7 +176,7 @@ inline unsigned DumpTimeSharedClassTable_hash(InstanceKlass* const& k) {
} else {
// Deterministic archive is not possible because classes can be loaded
// in multiple threads.
- return primitive_hash(k);
+ return primitive_hash(k);
}
}
diff --git a/src/hotspot/share/cds/filemap.cpp b/src/hotspot/share/cds/filemap.cpp
index 7901ab855ce891e0ed4aeffba19c1b187a5bf802..efa0fd8275ad07a5e293abb72317b559a2fb7627 100644
--- a/src/hotspot/share/cds/filemap.cpp
+++ b/src/hotspot/share/cds/filemap.cpp
@@ -166,10 +166,9 @@ template static void get_header_version(char (&header_version) [N]) {
assert(header_version[JVM_IDENT_MAX-1] == 0, "must be");
}
-FileMapInfo::FileMapInfo(const char* full_path, bool is_static) {
- memset((void*)this, 0, sizeof(FileMapInfo));
- _full_path = full_path;
- _is_static = is_static;
+FileMapInfo::FileMapInfo(const char* full_path, bool is_static) :
+ _is_static(is_static), _file_open(false), _is_mapped(false), _fd(-1), _file_offset(0),
+ _full_path(full_path), _base_archive_name(nullptr), _header(nullptr) {
if (_is_static) {
assert(_current_info == NULL, "must be singleton"); // not thread safe
_current_info = this;
@@ -177,8 +176,6 @@ FileMapInfo::FileMapInfo(const char* full_path, bool is_static) {
assert(_dynamic_archive_info == NULL, "must be singleton"); // not thread safe
_dynamic_archive_info = this;
}
- _file_offset = 0;
- _file_open = false;
}
FileMapInfo::~FileMapInfo() {
@@ -189,6 +186,11 @@ FileMapInfo::~FileMapInfo() {
assert(_dynamic_archive_info == this, "must be singleton"); // not thread safe
_dynamic_archive_info = NULL;
}
+
+ if (_header != nullptr) {
+ os::free(_header);
+ }
+
if (_file_open) {
::close(_fd);
}
@@ -1108,6 +1110,9 @@ public:
}
~FileHeaderHelper() {
+ if (_header != nullptr) {
+ FREE_C_HEAP_ARRAY(char, _header);
+ }
if (_fd != -1) {
::close(_fd);
}
@@ -2327,6 +2332,11 @@ void FileMapInfo::fixup_mapped_heap_regions() {
"Null closed_heap_regions array with non-zero count");
G1CollectedHeap::heap()->fill_archive_regions(closed_heap_regions,
num_closed_heap_regions);
+ // G1 marking uses the BOT for object chunking during marking in
+ // G1CMObjArrayProcessor::process_slice(); for this reason we need to
+ // initialize the BOT for closed archive regions too.
+ G1CollectedHeap::heap()->populate_archive_regions_bot_part(closed_heap_regions,
+ num_closed_heap_regions);
}
// do the same for mapped open archive heap regions
@@ -2339,11 +2349,6 @@ void FileMapInfo::fixup_mapped_heap_regions() {
// fast G1BlockOffsetTablePart::block_start operations for any given address
// within the open archive regions when trying to find start of an object
// (e.g. during card table scanning).
- //
- // This is only needed for open archive regions but not the closed archive
- // regions, because objects in closed archive regions never reference objects
- // outside the closed archive regions and they are immutable. So we never
- // need their BOT during garbage collection.
G1CollectedHeap::heap()->populate_archive_regions_bot_part(open_heap_regions,
num_open_heap_regions);
}
diff --git a/src/hotspot/share/cds/heapShared.cpp b/src/hotspot/share/cds/heapShared.cpp
index 26a852b01589a804cf76409c7844a3cbaf7ff272..fa387a73d1d91def1cebf146c468625269138c71 100644
--- a/src/hotspot/share/cds/heapShared.cpp
+++ b/src/hotspot/share/cds/heapShared.cpp
@@ -51,8 +51,9 @@
#include "memory/universe.hpp"
#include "oops/compressedOops.inline.hpp"
#include "oops/fieldStreams.inline.hpp"
-#include "oops/objArrayOop.hpp"
+#include "oops/objArrayOop.inline.hpp"
#include "oops/oop.inline.hpp"
+#include "oops/typeArrayOop.inline.hpp"
#include "prims/jvmtiExport.hpp"
#include "runtime/fieldDescriptor.inline.hpp"
#include "runtime/globals_extension.hpp"
@@ -213,6 +214,7 @@ void HeapShared::reset_archived_object_states(TRAPS) {
}
HeapShared::ArchivedObjectCache* HeapShared::_archived_object_cache = NULL;
+HeapShared::OriginalObjectTable* HeapShared::_original_object_table = NULL;
oop HeapShared::find_archived_heap_object(oop obj) {
assert(DumpSharedSpaces, "dump-time only");
ArchivedObjectCache* cache = archived_object_cache();
@@ -317,6 +319,9 @@ oop HeapShared::archive_object(oop obj) {
ArchivedObjectCache* cache = archived_object_cache();
CachedOopInfo info = make_cached_oop_info(archived_oop);
cache->put(obj, info);
+ if (_original_object_table != NULL) {
+ _original_object_table->put(archived_oop, obj);
+ }
if (log_is_enabled(Debug, cds, heap)) {
ResourceMark rm;
log_debug(cds, heap)("Archived heap object " PTR_FORMAT " ==> " PTR_FORMAT " : %s",
@@ -466,7 +471,7 @@ void HeapShared::archive_objects(GrowableArray* closed_regions,
NoSafepointVerifier nsv;
// Cache for recording where the archived objects are copied to
- create_archived_object_cache();
+ create_archived_object_cache(log_is_enabled(Info, cds, map));
log_info(cds)("Heap range = [" PTR_FORMAT " - " PTR_FORMAT "]",
UseCompressedOops ? p2i(CompressedOops::begin()) :
@@ -480,7 +485,6 @@ void HeapShared::archive_objects(GrowableArray* closed_regions,
copy_open_objects(open_regions);
CDSHeapVerifier::verify();
- destroy_archived_object_cache();
}
G1HeapVerifier::verify_archive_regions();
@@ -532,6 +536,12 @@ void HeapShared::copy_open_objects(GrowableArray* open_regions) {
// Copy _pending_archive_roots into an objArray
void HeapShared::copy_roots() {
+ // HeapShared::roots() points into an ObjArray in the open archive region. A portion of the
+ // objects in this array are discovered during HeapShared::archive_objects(). For example,
+ // in HeapShared::archive_reachable_objects_from() -> HeapShared::check_enum_obj().
+ // However, HeapShared::archive_objects() happens inside a safepoint, so we can't
+ // allocate a "regular" ObjArray and pass the result to HeapShared::archive_object().
+ // Instead, we have to roll our own alloc/copy routine here.
int length = _pending_roots != NULL ? _pending_roots->length() : 0;
size_t size = objArrayOopDesc::object_size(length);
Klass* k = Universe::objectArrayKlassObj(); // already relocated to point to archived klass
@@ -574,7 +584,7 @@ KlassSubGraphInfo* HeapShared::init_subgraph_info(Klass* k, bool is_full_module_
bool created;
Klass* relocated_k = ArchiveBuilder::get_relocated_klass(k);
KlassSubGraphInfo* info =
- _dump_time_subgraph_info_table->put_if_absent(relocated_k, KlassSubGraphInfo(relocated_k, is_full_module_graph),
+ _dump_time_subgraph_info_table->put_if_absent(k, KlassSubGraphInfo(relocated_k, is_full_module_graph),
&created);
assert(created, "must not initialize twice");
return info;
@@ -582,8 +592,7 @@ KlassSubGraphInfo* HeapShared::init_subgraph_info(Klass* k, bool is_full_module_
KlassSubGraphInfo* HeapShared::get_subgraph_info(Klass* k) {
assert(DumpSharedSpaces, "dump time only");
- Klass* relocated_k = ArchiveBuilder::get_relocated_klass(k);
- KlassSubGraphInfo* info = _dump_time_subgraph_info_table->get(relocated_k);
+ KlassSubGraphInfo* info = _dump_time_subgraph_info_table->get(k);
assert(info != NULL, "must have been initialized");
return info;
}
@@ -744,7 +753,8 @@ struct CopyKlassSubGraphInfoToArchive : StackObj {
(ArchivedKlassSubGraphInfoRecord*)ArchiveBuilder::ro_region_alloc(sizeof(ArchivedKlassSubGraphInfoRecord));
record->init(&info);
- unsigned int hash = SystemDictionaryShared::hash_for_shared_dictionary((address)klass);
+ Klass* relocated_k = ArchiveBuilder::get_relocated_klass(klass);
+ unsigned int hash = SystemDictionaryShared::hash_for_shared_dictionary((address)relocated_k);
u4 delta = ArchiveBuilder::current()->any_to_offset_u4(record);
_writer->add(hash, delta);
}
diff --git a/src/hotspot/share/cds/heapShared.hpp b/src/hotspot/share/cds/heapShared.hpp
index 74d1f1b6e7eb8c2531bbf82ffad9828bedad5671..402f451de4c7bde2190113ee59768f8a48e0fbd0 100644
--- a/src/hotspot/share/cds/heapShared.hpp
+++ b/src/hotspot/share/cds/heapShared.hpp
@@ -25,6 +25,7 @@
#ifndef SHARE_CDS_HEAPSHARED_HPP
#define SHARE_CDS_HEAPSHARED_HPP
+#include "cds/dumpTimeClassInfo.hpp"
#include "cds/metaspaceShared.hpp"
#include "classfile/compactHashtable.hpp"
#include "classfile/javaClasses.hpp"
@@ -246,23 +247,25 @@ private:
}
typedef ResourceHashtable ArchivedObjectCache;
static ArchivedObjectCache* _archived_object_cache;
- static unsigned klass_hash(Klass* const& klass) {
- // Generate deterministic hashcode even if SharedBaseAddress is changed due to ASLR.
- return primitive_hash(address(klass) - SharedBaseAddress);
- }
+ typedef ResourceHashtable OriginalObjectTable;
+ static OriginalObjectTable* _original_object_table;
class DumpTimeKlassSubGraphInfoTable
: public ResourceHashtable {
+ DumpTimeSharedClassTable_hash> {
public:
int _count;
};
@@ -379,17 +382,36 @@ private:
static void fill_failed_loaded_region();
public:
static void reset_archived_object_states(TRAPS);
- static void create_archived_object_cache() {
+ static void create_archived_object_cache(bool create_orig_table) {
_archived_object_cache =
new (ResourceObj::C_HEAP, mtClass)ArchivedObjectCache();
+ if (create_orig_table) {
+ _original_object_table =
+ new (ResourceObj::C_HEAP, mtClass)OriginalObjectTable();
+ } else {
+ _original_object_table = NULL;
+ }
}
static void destroy_archived_object_cache() {
delete _archived_object_cache;
_archived_object_cache = NULL;
+ if (_original_object_table != NULL) {
+ delete _original_object_table;
+ _original_object_table = NULL;
+ }
}
static ArchivedObjectCache* archived_object_cache() {
return _archived_object_cache;
}
+ static oop get_original_object(oop archived_object) {
+ assert(_original_object_table != NULL, "sanity");
+ oop* r = _original_object_table->get(archived_object);
+ if (r == NULL) {
+ return NULL;
+ } else {
+ return *r;
+ }
+ }
static oop find_archived_heap_object(oop obj);
static oop archive_object(oop obj);
diff --git a/src/hotspot/share/ci/ciSymbols.hpp b/src/hotspot/share/ci/ciSymbols.hpp
index df77b90148fdbe1207c336f5ff19e96d3a060dd3..244d7196cced253382490e9dfca3deb1805da00c 100644
--- a/src/hotspot/share/ci/ciSymbols.hpp
+++ b/src/hotspot/share/ci/ciSymbols.hpp
@@ -40,4 +40,4 @@ class ciSymbols {
};
-#endif // SHARE_CI_CISYMBOLS_HPP
\ No newline at end of file
+#endif // SHARE_CI_CISYMBOLS_HPP
diff --git a/src/hotspot/share/ci/ciTypeFlow.cpp b/src/hotspot/share/ci/ciTypeFlow.cpp
index 6ce6325cf91ff371aa4f643a3e84caf77f6e2328..e4ffd7dd9fe16719080af67dc347d938d9b5b2f1 100644
--- a/src/hotspot/share/ci/ciTypeFlow.cpp
+++ b/src/hotspot/share/ci/ciTypeFlow.cpp
@@ -2460,7 +2460,7 @@ int ciTypeFlow::profiled_count(ciTypeFlow::Loop* loop) {
return 0;
}
ciTypeFlow::Block* tail = loop->tail();
- if (tail->control() == -1) {
+ if (tail->control() == -1 || tail->has_trap()) {
return 0;
}
diff --git a/src/hotspot/share/classfile/javaClasses.cpp b/src/hotspot/share/classfile/javaClasses.cpp
index ae76dea73aa8fb73429e104aa9f0d814ef6bb41e..51596a02a12b5cac702603d722b2216c62c93e49 100644
--- a/src/hotspot/share/classfile/javaClasses.cpp
+++ b/src/hotspot/share/classfile/javaClasses.cpp
@@ -454,16 +454,18 @@ char* java_lang_String::as_platform_dependent_str(Handle java_string, TRAPS) {
}
char *native_platform_string;
- { JavaThread* thread = THREAD;
- jstring js = (jstring) JNIHandles::make_local(thread, java_string());
- bool is_copy;
+ JavaThread* thread = THREAD;
+ jstring js = (jstring) JNIHandles::make_local(thread, java_string());
+ {
HandleMark hm(thread);
ThreadToNativeFromVM ttn(thread);
JNIEnv *env = thread->jni_environment();
+ bool is_copy;
native_platform_string = (_to_platform_string_fn)(env, js, &is_copy);
assert(is_copy == JNI_TRUE, "is_copy value changed");
- JNIHandles::destroy_local(js);
}
+ JNIHandles::destroy_local(js);
+
return native_platform_string;
}
diff --git a/src/hotspot/share/classfile/vmIntrinsics.cpp b/src/hotspot/share/classfile/vmIntrinsics.cpp
index cc3dc1ebdccf58fc689140cfcc02f432ec07dfef..a329669bed3d25b0a9284b4cce4482581faa9754 100644
--- a/src/hotspot/share/classfile/vmIntrinsics.cpp
+++ b/src/hotspot/share/classfile/vmIntrinsics.cpp
@@ -229,7 +229,7 @@ bool vmIntrinsics::disabled_by_jvm_flags(vmIntrinsics::ID id) {
case vmIntrinsics::_loadFence:
case vmIntrinsics::_storeFence:
case vmIntrinsics::_fullFence:
- case vmIntrinsics::_hasNegatives:
+ case vmIntrinsics::_countPositives:
case vmIntrinsics::_Reference_get:
break;
default:
diff --git a/src/hotspot/share/classfile/vmIntrinsics.hpp b/src/hotspot/share/classfile/vmIntrinsics.hpp
index 7c3cb1d3f10235bcbb23d3363b930f98953a647d..b2757ca86bc856dc520822f0b942214e0a235759 100644
--- a/src/hotspot/share/classfile/vmIntrinsics.hpp
+++ b/src/hotspot/share/classfile/vmIntrinsics.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -134,6 +134,7 @@ class methodHandle;
do_name(log_name,"log") do_name(log10_name,"log10") do_name(pow_name,"pow") \
do_name(exp_name,"exp") do_name(min_name,"min") do_name(max_name,"max") \
do_name(floor_name, "floor") do_name(ceil_name, "ceil") do_name(rint_name, "rint") \
+ do_name(round_name, "round") \
\
do_name(addExact_name,"addExact") \
do_name(decrementExact_name,"decrementExact") \
@@ -185,6 +186,8 @@ class methodHandle;
do_intrinsic(_minF, java_lang_Math, min_name, float2_float_signature, F_S) \
do_intrinsic(_maxD, java_lang_Math, max_name, double2_double_signature, F_S) \
do_intrinsic(_minD, java_lang_Math, min_name, double2_double_signature, F_S) \
+ do_intrinsic(_roundD, java_lang_Math, round_name, double_long_signature, F_S) \
+ do_intrinsic(_roundF, java_lang_Math, round_name, float_int_signature, F_S) \
do_intrinsic(_dcopySign, java_lang_Math, copySign_name, double2_double_signature, F_S) \
do_intrinsic(_fcopySign, java_lang_Math, copySign_name, float2_float_signature, F_S) \
do_intrinsic(_dsignum, java_lang_Math, signum_name, double_double_signature, F_S) \
@@ -354,9 +357,9 @@ class methodHandle;
do_signature(Preconditions_checkLongIndex_signature, "(JJLjava/util/function/BiFunction;)J") \
\
do_class(java_lang_StringCoding, "java/lang/StringCoding") \
- do_intrinsic(_hasNegatives, java_lang_StringCoding, hasNegatives_name, hasNegatives_signature, F_S) \
- do_name( hasNegatives_name, "hasNegatives") \
- do_signature(hasNegatives_signature, "([BII)Z") \
+ do_intrinsic(_countPositives, java_lang_StringCoding, countPositives_name, countPositives_signature, F_S) \
+ do_name( countPositives_name, "countPositives") \
+ do_signature(countPositives_signature, "([BII)I") \
\
do_class(sun_nio_cs_iso8859_1_Encoder, "sun/nio/cs/ISO_8859_1$Encoder") \
do_intrinsic(_encodeISOArray, sun_nio_cs_iso8859_1_Encoder, encodeISOArray_name, encodeISOArray_signature, F_S) \
@@ -459,9 +462,8 @@ class methodHandle;
\
/* support for sun.security.provider.DigestBase */ \
do_class(sun_security_provider_digestbase, "sun/security/provider/DigestBase") \
- do_intrinsic(_digestBase_implCompressMB, sun_security_provider_digestbase, implCompressMB_name, implCompressMB_signature, F_R) \
+ do_intrinsic(_digestBase_implCompressMB, sun_security_provider_digestbase, implCompressMB_name, countPositives_signature, F_R) \
do_name( implCompressMB_name, "implCompressMultiBlock0") \
- do_signature(implCompressMB_signature, "([BII)I") \
\
/* support for java.util.Base64.Encoder*/ \
do_class(java_util_Base64_Encoder, "java/util/Base64$Encoder") \
diff --git a/src/hotspot/share/classfile/vmSymbols.hpp b/src/hotspot/share/classfile/vmSymbols.hpp
index e0402392467e2b9c9a19196798b5d755e569844e..f08afd2ce767326db10b7e08ce95bb4feb438037 100644
--- a/src/hotspot/share/classfile/vmSymbols.hpp
+++ b/src/hotspot/share/classfile/vmSymbols.hpp
@@ -465,8 +465,6 @@
template(security_manager_signature, "Ljava/lang/SecurityManager;") \
template(defineOrCheckPackage_name, "defineOrCheckPackage") \
template(defineOrCheckPackage_signature, "(Ljava/lang/String;Ljava/util/jar/Manifest;Ljava/net/URL;)Ljava/lang/Package;") \
- template(fileToEncodedURL_name, "fileToEncodedURL") \
- template(fileToEncodedURL_signature, "(Ljava/io/File;)Ljava/net/URL;") \
template(getProtectionDomain_name, "getProtectionDomain") \
template(getProtectionDomain_signature, "(Ljava/security/CodeSource;)Ljava/security/ProtectionDomain;") \
template(java_lang_Integer_array_signature, "[Ljava/lang/Integer;") \
@@ -561,6 +559,7 @@
template(char_array_void_signature, "([C)V") \
template(int_int_void_signature, "(II)V") \
template(long_long_void_signature, "(JJ)V") \
+ template(void_byte_array_signature, "()[B") \
template(void_classloader_signature, "()Ljava/lang/ClassLoader;") \
template(void_object_signature, "()Ljava/lang/Object;") \
template(void_class_signature, "()Ljava/lang/Class;") \
@@ -683,7 +682,6 @@
template(appendToClassPathForInstrumentation_name, "appendToClassPathForInstrumentation") \
do_alias(appendToClassPathForInstrumentation_signature, string_void_signature) \
template(serializePropertiesToByteArray_name, "serializePropertiesToByteArray") \
- template(serializePropertiesToByteArray_signature, "()[B") \
template(serializeAgentPropertiesToByteArray_name, "serializeAgentPropertiesToByteArray") \
template(classRedefinedCount_name, "classRedefinedCount") \
template(classLoader_name, "classLoader") \
diff --git a/src/hotspot/share/code/debugInfo.hpp b/src/hotspot/share/code/debugInfo.hpp
index 3f213783a218afc6b24effa529fde4e9e0630d15..7c85ae60c33fa66d905dbb4e0271143eaa47d602 100644
--- a/src/hotspot/share/code/debugInfo.hpp
+++ b/src/hotspot/share/code/debugInfo.hpp
@@ -29,7 +29,6 @@
#include "code/location.hpp"
#include "code/nmethod.hpp"
#include "code/oopRecorder.hpp"
-#include "runtime/stackValue.hpp"
#include "runtime/thread.hpp"
#include "utilities/growableArray.hpp"
diff --git a/src/hotspot/share/code/exceptionHandlerTable.cpp b/src/hotspot/share/code/exceptionHandlerTable.cpp
index f3c2a7870445877fb5fb7bd8d82b6b914393d815..d43d18e4b2b53cdc30a8044dd6833c1be38ed801 100644
--- a/src/hotspot/share/code/exceptionHandlerTable.cpp
+++ b/src/hotspot/share/code/exceptionHandlerTable.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -68,7 +68,7 @@ ExceptionHandlerTable::ExceptionHandlerTable(int initial_size) {
ExceptionHandlerTable::ExceptionHandlerTable(const CompiledMethod* cm) {
_table = (HandlerTableEntry*)cm->handler_table_begin();
_length = cm->handler_table_size() / sizeof(HandlerTableEntry);
- _size = 0; // no space allocated by ExeptionHandlerTable!
+ _size = 0; // no space allocated by ExceptionHandlerTable!
}
diff --git a/src/hotspot/share/compiler/compilerDefinitions.cpp b/src/hotspot/share/compiler/compilerDefinitions.cpp
index aa8dd0a1be8638c9bc4519177af24624da36744c..1bbff8fb77bcc5ce45d9ba52ca11aec1ad321da2 100644
--- a/src/hotspot/share/compiler/compilerDefinitions.cpp
+++ b/src/hotspot/share/compiler/compilerDefinitions.cpp
@@ -124,10 +124,17 @@ intx CompilerConfig::scaled_freq_log(intx freq_log) {
// Returns threshold scaled with the value of scale.
// If scale < 0.0, threshold is returned without scaling.
intx CompilerConfig::scaled_compile_threshold(intx threshold, double scale) {
+ assert(threshold >= 0, "must be");
if (scale == 1.0 || scale < 0.0) {
return threshold;
} else {
- return (intx)(threshold * scale);
+ double v = threshold * scale;
+ assert(v >= 0, "must be");
+ if (v > max_intx) {
+ return max_intx;
+ } else {
+ return (intx)(v);
+ }
}
}
diff --git a/src/hotspot/share/gc/g1/g1BarrierSet.cpp b/src/hotspot/share/gc/g1/g1BarrierSet.cpp
index 994bf16c2e30b5e816c2b5ea41d36ed6f25f11ab..a04c7370518df6b06ea419fa1faaf2c2c7be8bc4 100644
--- a/src/hotspot/share/gc/g1/g1BarrierSet.cpp
+++ b/src/hotspot/share/gc/g1/g1BarrierSet.cpp
@@ -61,13 +61,6 @@ G1BarrierSet::G1BarrierSet(G1CardTable* card_table) :
_dirty_card_queue_set(&_dirty_card_queue_buffer_allocator)
{}
-void G1BarrierSet::enqueue(oop pre_val) {
- // Nulls should have been already filtered.
- assert(oopDesc::is_oop(pre_val, true), "Error");
- SATBMarkQueue& queue = G1ThreadLocalData::satb_mark_queue(Thread::current());
- G1BarrierSet::satb_mark_queue_set().enqueue(queue, pre_val);
-}
-
template void
G1BarrierSet::write_ref_array_pre_work(T* dst, size_t count) {
G1SATBMarkQueueSet& queue_set = G1BarrierSet::satb_mark_queue_set();
diff --git a/src/hotspot/share/gc/g1/g1BarrierSet.hpp b/src/hotspot/share/gc/g1/g1BarrierSet.hpp
index e8eb6e9c9d1dfe252999693f54b7b66b0ae04798..d90d83298c9d2a1036ca04109c1eb39806766c97 100644
--- a/src/hotspot/share/gc/g1/g1BarrierSet.hpp
+++ b/src/hotspot/share/gc/g1/g1BarrierSet.hpp
@@ -56,10 +56,12 @@ class G1BarrierSet: public CardTableBarrierSet {
}
// Add "pre_val" to a set of objects that may have been disconnected from the
- // pre-marking object graph.
- static void enqueue(oop pre_val);
+ // pre-marking object graph. Prefer the version that takes location, as it
+ // can avoid touching the heap unnecessarily.
+ template static void enqueue(T* dst);
+ static void enqueue_preloaded(oop pre_val);
- static void enqueue_if_weak(DecoratorSet decorators, oop value);
+ static void enqueue_preloaded_if_weak(DecoratorSet decorators, oop value);
template void write_ref_array_pre_work(T* dst, size_t count);
virtual void write_ref_array_pre(oop* dst, size_t count, bool dest_uninitialized);
diff --git a/src/hotspot/share/gc/g1/g1BarrierSet.inline.hpp b/src/hotspot/share/gc/g1/g1BarrierSet.inline.hpp
index 8fa29544ca4338a99fbb3aac848d242b312ad827..f7a6fd3ee5a37a052058a818b9c0ddc9868b2e3c 100644
--- a/src/hotspot/share/gc/g1/g1BarrierSet.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1BarrierSet.inline.hpp
@@ -28,11 +28,35 @@
#include "gc/g1/g1BarrierSet.hpp"
#include "gc/g1/g1CardTable.hpp"
+#include "gc/g1/g1ThreadLocalData.hpp"
#include "gc/shared/accessBarrierSupport.inline.hpp"
#include "oops/access.inline.hpp"
#include "oops/compressedOops.inline.hpp"
#include "oops/oop.hpp"
+inline void G1BarrierSet::enqueue_preloaded(oop pre_val) {
+ // Nulls should have been already filtered.
+ assert(oopDesc::is_oop(pre_val, true), "Error");
+
+ G1SATBMarkQueueSet& queue_set = G1BarrierSet::satb_mark_queue_set();
+ if (!queue_set.is_active()) return;
+
+ SATBMarkQueue& queue = G1ThreadLocalData::satb_mark_queue(Thread::current());
+ queue_set.enqueue_known_active(queue, pre_val);
+}
+
+template
+inline void G1BarrierSet::enqueue(T* dst) {
+ G1SATBMarkQueueSet& queue_set = G1BarrierSet::satb_mark_queue_set();
+ if (!queue_set.is_active()) return;
+
+ T heap_oop = RawAccess::oop_load(dst);
+ if (!CompressedOops::is_null(heap_oop)) {
+ SATBMarkQueue& queue = G1ThreadLocalData::satb_mark_queue(Thread::current());
+ queue_set.enqueue_known_active(queue, CompressedOops::decode_not_null(heap_oop));
+ }
+}
+
template
inline void G1BarrierSet::write_ref_field_pre(T* field) {
if (HasDecorator::value ||
@@ -40,10 +64,7 @@ inline void G1BarrierSet::write_ref_field_pre(T* field) {
return;
}
- T heap_oop = RawAccess::oop_load(field);
- if (!CompressedOops::is_null(heap_oop)) {
- enqueue(CompressedOops::decode_not_null(heap_oop));
- }
+ enqueue(field);
}
template
@@ -55,7 +76,7 @@ inline void G1BarrierSet::write_ref_field_post(T* field, oop new_val) {
}
}
-inline void G1BarrierSet::enqueue_if_weak(DecoratorSet decorators, oop value) {
+inline void G1BarrierSet::enqueue_preloaded_if_weak(DecoratorSet decorators, oop value) {
assert((decorators & ON_UNKNOWN_OOP_REF) == 0, "Reference strength must be known");
// Loading from a weak or phantom reference needs enqueueing, as
// the object may not have been reachable (part of the snapshot)
@@ -65,7 +86,7 @@ inline void G1BarrierSet::enqueue_if_weak(DecoratorSet decorators, oop value) {
const bool needs_enqueue = (!peek && !on_strong_oop_ref);
if (needs_enqueue && value != NULL) {
- enqueue(value);
+ enqueue_preloaded(value);
}
}
@@ -74,7 +95,7 @@ template
inline oop G1BarrierSet::AccessBarrier::
oop_load_not_in_heap(T* addr) {
oop value = ModRef::oop_load_not_in_heap(addr);
- enqueue_if_weak(decorators, value);
+ enqueue_preloaded_if_weak(decorators, value);
return value;
}
@@ -83,7 +104,7 @@ template
inline oop G1BarrierSet::AccessBarrier::
oop_load_in_heap(T* addr) {
oop value = ModRef::oop_load_in_heap(addr);
- enqueue_if_weak(decorators, value);
+ enqueue_preloaded_if_weak(decorators, value);
return value;
}
@@ -91,7 +112,7 @@ template
inline oop G1BarrierSet::AccessBarrier::
oop_load_in_heap_at(oop base, ptrdiff_t offset) {
oop value = ModRef::oop_load_in_heap_at(base, offset);
- enqueue_if_weak(AccessBarrierSupport::resolve_possibly_unknown_oop_ref_strength(base, offset), value);
+ enqueue_preloaded_if_weak(AccessBarrierSupport::resolve_possibly_unknown_oop_ref_strength(base, offset), value);
return value;
}
diff --git a/src/hotspot/share/gc/g1/g1BlockOffsetTable.inline.hpp b/src/hotspot/share/gc/g1/g1BlockOffsetTable.inline.hpp
index f70cb118627225f646cb43345c44b4a5a37cb085..058a9f58785dfd978e02ddd5dbab961159e75276 100644
--- a/src/hotspot/share/gc/g1/g1BlockOffsetTable.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1BlockOffsetTable.inline.hpp
@@ -140,7 +140,7 @@ inline HeapWord* G1BlockOffsetTablePart::forward_to_block_containing_addr(HeapWo
"start of block must be an initialized object");
n += block_size(q);
}
- assert(q <= n, "wrong order for q and addr");
+ assert(q <= addr, "wrong order for q and addr");
assert(addr < n, "wrong order for addr and n");
return q;
}
diff --git a/src/hotspot/share/gc/g1/g1CardSet.cpp b/src/hotspot/share/gc/g1/g1CardSet.cpp
index a87db1ff71849645596a636daa06b2c005ca9794..82092a1d5020c9d17b5af25c947757365ce4e240 100644
--- a/src/hotspot/share/gc/g1/g1CardSet.cpp
+++ b/src/hotspot/share/gc/g1/g1CardSet.cpp
@@ -26,25 +26,19 @@
#include "gc/g1/g1CardSet.inline.hpp"
#include "gc/g1/g1CardSetContainers.inline.hpp"
#include "gc/g1/g1CardSetMemory.inline.hpp"
-#include "gc/g1/g1FromCardCache.hpp"
#include "gc/g1/heapRegion.inline.hpp"
+#include "gc/shared/gcLogPrecious.hpp"
+#include "gc/shared/gcTraceTime.inline.hpp"
#include "memory/allocation.inline.hpp"
#include "runtime/atomic.hpp"
#include "runtime/globals_extension.hpp"
-#include "runtime/mutex.hpp"
#include "utilities/bitMap.inline.hpp"
#include "utilities/concurrentHashTable.inline.hpp"
#include "utilities/globalDefinitions.hpp"
-#include "utilities/lockFreeStack.hpp"
-#include "utilities/spinYield.hpp"
-
-#include "gc/shared/gcLogPrecious.hpp"
-#include "gc/shared/gcTraceTime.inline.hpp"
-#include "runtime/java.hpp"
-G1CardSet::CardSetPtr G1CardSet::FullCardSet = (G1CardSet::CardSetPtr)-1;
+G1CardSet::ContainerPtr G1CardSet::FullCardSet = (G1CardSet::ContainerPtr)-1;
-static uint default_log2_card_region_per_region() {
+static uint default_log2_card_regions_per_region() {
uint log2_card_regions_per_heap_region = 0;
const uint card_container_limit = G1CardSetContainer::LogCardsPerRegionLimit;
@@ -62,7 +56,7 @@ G1CardSetConfiguration::G1CardSetConfiguration() :
G1RemSetHowlNumBuckets, /* num_buckets_in_howl */
(double)G1RemSetCoarsenHowlToFullPercent / 100, /* cards_in_howl_threshold_percent */
(uint)HeapRegion::CardsPerRegion, /* max_cards_in_cardset */
- default_log2_card_region_per_region()) /* log2_card_region_per_region */
+ default_log2_card_regions_per_region()) /* log2_card_regions_per_region */
{
assert((_log2_card_regions_per_heap_region + _log2_cards_per_card_region) == (uint)HeapRegion::LogCardsPerRegion,
"inconsistent heap region virtualization setup");
@@ -73,7 +67,7 @@ G1CardSetConfiguration::G1CardSetConfiguration(uint max_cards_in_array,
uint max_buckets_in_howl,
double cards_in_howl_threshold_percent,
uint max_cards_in_card_set,
- uint log2_card_region_per_region) :
+ uint log2_card_regions_per_region) :
G1CardSetConfiguration(log2i_exact(max_cards_in_card_set), /* inline_ptr_bits_per_card */
max_cards_in_array, /* max_cards_in_array */
cards_in_bitmap_threshold_percent, /* cards_in_bitmap_threshold_percent */
@@ -82,7 +76,7 @@ G1CardSetConfiguration::G1CardSetConfiguration(uint max_cards_in_array,
max_buckets_in_howl),
cards_in_howl_threshold_percent, /* cards_in_howl_threshold_percent */
max_cards_in_card_set, /* max_cards_in_cardset */
- log2_card_region_per_region)
+ log2_card_regions_per_region)
{ }
G1CardSetConfiguration::G1CardSetConfiguration(uint inline_ptr_bits_per_card,
@@ -197,7 +191,7 @@ void G1CardSetCoarsenStats::print_on(outputStream* out) {
}
class G1CardSetHashTable : public CHeapObj {
- using CardSetPtr = G1CardSet::CardSetPtr;
+ using ContainerPtr = G1CardSet::ContainerPtr;
// Did we insert at least one card in the table?
bool volatile _inserted_card;
@@ -231,12 +225,12 @@ class G1CardSetHashTable : public CHeapObj {
};
class G1CardSetHashTableScan : public StackObj {
- G1CardSet::CardSetPtrClosure* _scan_f;
+ G1CardSet::ContainerPtrClosure* _scan_f;
public:
- explicit G1CardSetHashTableScan(G1CardSet::CardSetPtrClosure* f) : _scan_f(f) { }
+ explicit G1CardSetHashTableScan(G1CardSet::ContainerPtrClosure* f) : _scan_f(f) { }
bool operator()(G1CardSetHashTableValue* value) {
- _scan_f->do_cardsetptr(value->_region_idx, value->_num_occupied, value->_card_set);
+ _scan_f->do_containerptr(value->_region_idx, value->_num_occupied, value->_container);
return true;
}
};
@@ -284,19 +278,19 @@ public:
return found.value();
}
- void iterate_safepoint(G1CardSet::CardSetPtrClosure* cl2) {
+ void iterate_safepoint(G1CardSet::ContainerPtrClosure* cl2) {
G1CardSetHashTableScan cl(cl2);
_table.do_safepoint_scan(cl);
}
- void iterate(G1CardSet::CardSetPtrClosure* cl2) {
+ void iterate(G1CardSet::ContainerPtrClosure* cl2) {
G1CardSetHashTableScan cl(cl2);
_table.do_scan(Thread::current(), cl);
}
void reset() {
if (Atomic::load(&_inserted_card)) {
- _table.unsafe_reset(InitialLogTableSize);
+ _table.unsafe_reset(InitialLogTableSize);
Atomic::store(&_inserted_card, false);
}
}
@@ -343,93 +337,93 @@ G1CardSet::~G1CardSet() {
_mm->flush();
}
-uint G1CardSet::card_set_type_to_mem_object_type(uintptr_t type) const {
- assert(type == G1CardSet::CardSetArrayOfCards ||
- type == G1CardSet::CardSetBitMap ||
- type == G1CardSet::CardSetHowl, "should not allocate card set type %zu", type);
+uint G1CardSet::container_type_to_mem_object_type(uintptr_t type) const {
+ assert(type == G1CardSet::ContainerArrayOfCards ||
+ type == G1CardSet::ContainerBitMap ||
+ type == G1CardSet::ContainerHowl, "should not allocate container type %zu", type);
return (uint)type;
}
uint8_t* G1CardSet::allocate_mem_object(uintptr_t type) {
- return _mm->allocate(card_set_type_to_mem_object_type(type));
+ return _mm->allocate(container_type_to_mem_object_type(type));
}
-void G1CardSet::free_mem_object(CardSetPtr card_set) {
- assert(card_set != G1CardSet::FreeCardSet, "should not free Free card set");
- assert(card_set != G1CardSet::FullCardSet, "should not free Full card set");
+void G1CardSet::free_mem_object(ContainerPtr container) {
+ assert(container != G1CardSet::FreeCardSet, "should not free container FreeCardSet");
+ assert(container != G1CardSet::FullCardSet, "should not free container FullCardSet");
- uintptr_t type = card_set_type(card_set);
- void* value = strip_card_set_type(card_set);
+ uintptr_t type = container_type(container);
+ void* value = strip_container_type(container);
- assert(type == G1CardSet::CardSetArrayOfCards ||
- type == G1CardSet::CardSetBitMap ||
- type == G1CardSet::CardSetHowl, "should not free card set type %zu", type);
+ assert(type == G1CardSet::ContainerArrayOfCards ||
+ type == G1CardSet::ContainerBitMap ||
+ type == G1CardSet::ContainerHowl, "should not free card set type %zu", type);
assert(static_cast(value)->refcount() == 1, "must be");
- _mm->free(card_set_type_to_mem_object_type(type), value);
+ _mm->free(container_type_to_mem_object_type(type), value);
}
-G1CardSet::CardSetPtr G1CardSet::acquire_card_set(CardSetPtr volatile* card_set_addr) {
+G1CardSet::ContainerPtr G1CardSet::acquire_container(ContainerPtr volatile* container_addr) {
// Update reference counts under RCU critical section to avoid a
// use-after-cleapup bug where we increment a reference count for
// an object whose memory has already been cleaned up and reused.
GlobalCounter::CriticalSection cs(Thread::current());
while (true) {
- // Get cardsetptr and increment refcount atomically wrt to memory reuse.
- CardSetPtr card_set = Atomic::load_acquire(card_set_addr);
- uint cs_type = card_set_type(card_set);
- if (card_set == FullCardSet || cs_type == CardSetInlinePtr) {
- return card_set;
+ // Get ContainerPtr and increment refcount atomically wrt to memory reuse.
+ ContainerPtr container = Atomic::load_acquire(container_addr);
+ uint cs_type = container_type(container);
+ if (container == FullCardSet || cs_type == ContainerInlinePtr) {
+ return container;
}
- G1CardSetContainer* card_set_on_heap = (G1CardSetContainer*)strip_card_set_type(card_set);
+ G1CardSetContainer* container_on_heap = (G1CardSetContainer*)strip_container_type(container);
- if (card_set_on_heap->try_increment_refcount()) {
- assert(card_set_on_heap->refcount() >= 3, "Smallest value is 3");
- return card_set;
+ if (container_on_heap->try_increment_refcount()) {
+ assert(container_on_heap->refcount() >= 3, "Smallest value is 3");
+ return container;
}
}
}
-bool G1CardSet::release_card_set(CardSetPtr card_set) {
- uint cs_type = card_set_type(card_set);
- if (card_set == FullCardSet || cs_type == CardSetInlinePtr) {
+bool G1CardSet::release_container(ContainerPtr container) {
+ uint cs_type = container_type(container);
+ if (container == FullCardSet || cs_type == ContainerInlinePtr) {
return false;
}
- G1CardSetContainer* card_set_on_heap = (G1CardSetContainer*)strip_card_set_type(card_set);
- return card_set_on_heap->decrement_refcount() == 1;
+ G1CardSetContainer* container_on_heap = (G1CardSetContainer*)strip_container_type(container);
+ return container_on_heap->decrement_refcount() == 1;
}
-void G1CardSet::release_and_maybe_free_card_set(CardSetPtr card_set) {
- if (release_card_set(card_set)) {
- free_mem_object(card_set);
+void G1CardSet::release_and_maybe_free_container(ContainerPtr container) {
+ if (release_container(container)) {
+ free_mem_object(container);
}
}
-void G1CardSet::release_and_must_free_card_set(CardSetPtr card_set) {
- bool should_free = release_card_set(card_set);
+void G1CardSet::release_and_must_free_container(ContainerPtr container) {
+ bool should_free = release_container(container);
assert(should_free, "should have been the only one having a reference");
- free_mem_object(card_set);
+ free_mem_object(container);
}
class G1ReleaseCardsets : public StackObj {
G1CardSet* _card_set;
- using CardSetPtr = G1CardSet::CardSetPtr;
+ using ContainerPtr = G1CardSet::ContainerPtr;
- void coarsen_to_full(CardSetPtr* card_set_addr) {
+ void coarsen_to_full(ContainerPtr* container_addr) {
while (true) {
- CardSetPtr cur_card_set = Atomic::load_acquire(card_set_addr);
- uint cs_type = G1CardSet::card_set_type(cur_card_set);
- if (cur_card_set == G1CardSet::FullCardSet) {
+ ContainerPtr cur_container = Atomic::load_acquire(container_addr);
+ uint cs_type = G1CardSet::container_type(cur_container);
+ if (cur_container == G1CardSet::FullCardSet) {
return;
}
- CardSetPtr old_value = Atomic::cmpxchg(card_set_addr, cur_card_set, G1CardSet::FullCardSet);
+ ContainerPtr old_value = Atomic::cmpxchg(container_addr, cur_container, G1CardSet::FullCardSet);
- if (old_value == cur_card_set) {
- _card_set->release_and_maybe_free_card_set(cur_card_set);
+ if (old_value == cur_container) {
+ _card_set->release_and_maybe_free_container(cur_container);
return;
}
}
@@ -438,51 +432,51 @@ class G1ReleaseCardsets : public StackObj {
public:
explicit G1ReleaseCardsets(G1CardSet* card_set) : _card_set(card_set) { }
- void operator ()(CardSetPtr* card_set_addr) {
- coarsen_to_full(card_set_addr);
+ void operator ()(ContainerPtr* container_addr) {
+ coarsen_to_full(container_addr);
}
};
-G1AddCardResult G1CardSet::add_to_array(CardSetPtr card_set, uint card_in_region) {
- G1CardSetArray* array = card_set_ptr(card_set);
+G1AddCardResult G1CardSet::add_to_array(ContainerPtr container, uint card_in_region) {
+ G1CardSetArray* array = container_ptr(container);
return array->add(card_in_region);
}
-G1AddCardResult G1CardSet::add_to_howl(CardSetPtr parent_card_set,
- uint card_region,
- uint card_in_region,
- bool increment_total) {
- G1CardSetHowl* howl = card_set_ptr(parent_card_set);
+G1AddCardResult G1CardSet::add_to_howl(ContainerPtr parent_container,
+ uint card_region,
+ uint card_in_region,
+ bool increment_total) {
+ G1CardSetHowl* howl = container_ptr(parent_container);
G1AddCardResult add_result;
- CardSetPtr to_transfer = nullptr;
- CardSetPtr card_set;
+ ContainerPtr to_transfer = nullptr;
+ ContainerPtr container;
uint bucket = _config->howl_bucket_index(card_in_region);
- volatile CardSetPtr* bucket_entry = howl->get_card_set_addr(bucket);
+ ContainerPtr volatile* bucket_entry = howl->get_container_addr(bucket);
while (true) {
if (Atomic::load(&howl->_num_entries) >= _config->cards_in_howl_threshold()) {
return Overflow;
}
- card_set = acquire_card_set(bucket_entry);
- add_result = add_to_card_set(bucket_entry, card_set, card_region, card_in_region);
+ container = acquire_container(bucket_entry);
+ add_result = add_to_container(bucket_entry, container, card_region, card_in_region);
if (add_result != Overflow) {
break;
}
- // Card set has overflown. Coarsen or retry.
- bool coarsened = coarsen_card_set(bucket_entry, card_set, card_in_region, true /* within_howl */);
- _coarsen_stats.record_coarsening(card_set_type(card_set) + G1CardSetCoarsenStats::CoarsenHowlOffset, !coarsened);
+ // Card set container has overflown. Coarsen or retry.
+ bool coarsened = coarsen_container(bucket_entry, container, card_in_region, true /* within_howl */);
+ _coarsen_stats.record_coarsening(container_type(container) + G1CardSetCoarsenStats::CoarsenHowlOffset, !coarsened);
if (coarsened) {
- // We have been the one coarsening this card set (and in the process added that card).
+ // We successful coarsened this card set container (and in the process added the card).
add_result = Added;
- to_transfer = card_set;
+ to_transfer = container;
break;
}
// Somebody else beat us to coarsening. Retry.
- release_and_maybe_free_card_set(card_set);
+ release_and_maybe_free_container(container);
}
if (increment_total && add_result == Added) {
@@ -490,91 +484,91 @@ G1AddCardResult G1CardSet::add_to_howl(CardSetPtr parent_card_set,
}
if (to_transfer != nullptr) {
- transfer_cards_in_howl(parent_card_set, to_transfer, card_region);
+ transfer_cards_in_howl(parent_container, to_transfer, card_region);
}
- release_and_maybe_free_card_set(card_set);
+ release_and_maybe_free_container(container);
return add_result;
}
-G1AddCardResult G1CardSet::add_to_bitmap(CardSetPtr card_set, uint card_in_region) {
- G1CardSetBitMap* bitmap = card_set_ptr(card_set);
+G1AddCardResult G1CardSet::add_to_bitmap(ContainerPtr container, uint card_in_region) {
+ G1CardSetBitMap* bitmap = container_ptr(container);
uint card_offset = _config->howl_bitmap_offset(card_in_region);
return bitmap->add(card_offset, _config->cards_in_howl_bitmap_threshold(), _config->max_cards_in_howl_bitmap());
}
-G1AddCardResult G1CardSet::add_to_inline_ptr(CardSetPtr volatile* card_set_addr, CardSetPtr card_set, uint card_in_region) {
- G1CardSetInlinePtr value(card_set_addr, card_set);
+G1AddCardResult G1CardSet::add_to_inline_ptr(ContainerPtr volatile* container_addr, ContainerPtr container, uint card_in_region) {
+ G1CardSetInlinePtr value(container_addr, container);
return value.add(card_in_region, _config->inline_ptr_bits_per_card(), _config->max_cards_in_inline_ptr());
}
-G1CardSet::CardSetPtr G1CardSet::create_coarsened_array_of_cards(uint card_in_region, bool within_howl) {
+G1CardSet::ContainerPtr G1CardSet::create_coarsened_array_of_cards(uint card_in_region, bool within_howl) {
uint8_t* data = nullptr;
- CardSetPtr new_card_set;
+ ContainerPtr new_container;
if (within_howl) {
uint const size_in_bits = _config->max_cards_in_howl_bitmap();
- uint card_offset = _config->howl_bitmap_offset(card_in_region);
- data = allocate_mem_object(CardSetBitMap);
- new (data) G1CardSetBitMap(card_offset, size_in_bits);
- new_card_set = make_card_set_ptr(data, CardSetBitMap);
+ uint container_offset = _config->howl_bitmap_offset(card_in_region);
+ data = allocate_mem_object(ContainerBitMap);
+ new (data) G1CardSetBitMap(container_offset, size_in_bits);
+ new_container = make_container_ptr(data, ContainerBitMap);
} else {
- data = allocate_mem_object(CardSetHowl);
+ data = allocate_mem_object(ContainerHowl);
new (data) G1CardSetHowl(card_in_region, _config);
- new_card_set = make_card_set_ptr(data, CardSetHowl);
+ new_container = make_container_ptr(data, ContainerHowl);
}
- return new_card_set;
+ return new_container;
}
-bool G1CardSet::coarsen_card_set(volatile CardSetPtr* card_set_addr,
- CardSetPtr cur_card_set,
- uint card_in_region,
- bool within_howl) {
- CardSetPtr new_card_set = nullptr;
+bool G1CardSet::coarsen_container(ContainerPtr volatile* container_addr,
+ ContainerPtr cur_container,
+ uint card_in_region,
+ bool within_howl) {
+ ContainerPtr new_container = nullptr;
- switch (card_set_type(cur_card_set)) {
- case CardSetArrayOfCards : {
- new_card_set = create_coarsened_array_of_cards(card_in_region, within_howl);
+ switch (container_type(cur_container)) {
+ case ContainerArrayOfCards: {
+ new_container = create_coarsened_array_of_cards(card_in_region, within_howl);
break;
}
- case CardSetBitMap: {
- new_card_set = FullCardSet;
+ case ContainerBitMap: {
+ new_container = FullCardSet;
break;
}
- case CardSetInlinePtr: {
+ case ContainerInlinePtr: {
uint const size = _config->max_cards_in_array();
- uint8_t* data = allocate_mem_object(CardSetArrayOfCards);
+ uint8_t* data = allocate_mem_object(ContainerArrayOfCards);
new (data) G1CardSetArray(card_in_region, size);
- new_card_set = make_card_set_ptr(data, CardSetArrayOfCards);
+ new_container = make_container_ptr(data, ContainerArrayOfCards);
break;
}
- case CardSetHowl: {
- new_card_set = FullCardSet; // anything will do at this point.
+ case ContainerHowl: {
+ new_container = FullCardSet; // anything will do at this point.
break;
}
default:
ShouldNotReachHere();
}
- CardSetPtr old_value = Atomic::cmpxchg(card_set_addr, cur_card_set, new_card_set); // Memory order?
- if (old_value == cur_card_set) {
+ ContainerPtr old_value = Atomic::cmpxchg(container_addr, cur_container, new_container); // Memory order?
+ if (old_value == cur_container) {
// Success. Indicate that the cards from the current card set must be transferred
// by this caller.
// Release the hash table reference to the card. The caller still holds the
// reference to this card set, so it can never be released (and we do not need to
// check its result).
- bool should_free = release_card_set(cur_card_set);
+ bool should_free = release_container(cur_container);
assert(!should_free, "must have had more than one reference");
- // Free containers if cur_card_set is CardSetHowl
- if (card_set_type(cur_card_set) == CardSetHowl) {
+ // Free containers if cur_container is ContainerHowl
+ if (container_type(cur_container) == ContainerHowl) {
G1ReleaseCardsets rel(this);
- card_set_ptr(cur_card_set)->iterate(rel, _config->num_buckets_in_howl());
+ container_ptr(cur_container)->iterate(rel, _config->num_buckets_in_howl());
}
return true;
} else {
// Somebody else beat us to coarsening that card set. Exit, but clean up first.
- if (new_card_set != FullCardSet) {
- assert(new_card_set != nullptr, "must not be");
- release_and_must_free_card_set(new_card_set);
+ if (new_container != FullCardSet) {
+ assert(new_container != nullptr, "must not be");
+ release_and_must_free_container(new_container);
}
return false;
}
@@ -591,34 +585,34 @@ public:
}
};
-void G1CardSet::transfer_cards(G1CardSetHashTableValue* table_entry, CardSetPtr source_card_set, uint card_region) {
- assert(source_card_set != FullCardSet, "Should not need to transfer from full");
- // Need to transfer old entries unless there is a Full card set in place now, i.e.
- // the old type has been CardSetBitMap. "Full" contains all elements anyway.
- if (card_set_type(source_card_set) != CardSetHowl) {
+void G1CardSet::transfer_cards(G1CardSetHashTableValue* table_entry, ContainerPtr source_container, uint card_region) {
+ assert(source_container != FullCardSet, "Should not need to transfer from FullCardSet");
+ // Need to transfer old entries unless there is a Full card set container in place now, i.e.
+ // the old type has been ContainerBitMap. "Full" contains all elements anyway.
+ if (container_type(source_container) != ContainerHowl) {
G1TransferCard iter(this, card_region);
- iterate_cards_during_transfer(source_card_set, iter);
+ iterate_cards_during_transfer(source_container, iter);
} else {
- assert(card_set_type(source_card_set) == CardSetHowl, "must be");
+ assert(container_type(source_container) == ContainerHowl, "must be");
// Need to correct for that the Full remembered set occupies more cards than the
// AoCS before.
Atomic::add(&_num_occupied, _config->max_cards_in_region() - table_entry->_num_occupied, memory_order_relaxed);
}
}
-void G1CardSet::transfer_cards_in_howl(CardSetPtr parent_card_set,
- CardSetPtr source_card_set,
- uint card_region) {
- assert(card_set_type(parent_card_set) == CardSetHowl, "must be");
- assert(source_card_set != FullCardSet, "Should not need to transfer from full");
+void G1CardSet::transfer_cards_in_howl(ContainerPtr parent_container,
+ ContainerPtr source_container,
+ uint card_region) {
+ assert(container_type(parent_container) == ContainerHowl, "must be");
+ assert(source_container != FullCardSet, "Should not need to transfer from full");
// Need to transfer old entries unless there is a Full card set in place now, i.e.
- // the old type has been CardSetBitMap.
- if (card_set_type(source_card_set) != CardSetBitMap) {
- // We only need to transfer from anything below CardSetBitMap.
+ // the old type has been ContainerBitMap.
+ if (container_type(source_container) != ContainerBitMap) {
+ // We only need to transfer from anything below ContainerBitMap.
G1TransferCard iter(this, card_region);
- iterate_cards_during_transfer(source_card_set, iter);
+ iterate_cards_during_transfer(source_container, iter);
} else {
- uint diff = _config->max_cards_in_howl_bitmap() - card_set_ptr(source_card_set)->num_bits_set();
+ uint diff = _config->max_cards_in_howl_bitmap() - container_ptr(source_container)->num_bits_set();
// Need to correct for that the Full remembered set occupies more cards than the
// bitmap before.
@@ -627,10 +621,10 @@ void G1CardSet::transfer_cards_in_howl(CardSetPtr parent_card_set,
// G1CardSet::add_to_howl after coarsening.
diff -= 1;
- G1CardSetHowl* howling_array = card_set_ptr(parent_card_set);
+ G1CardSetHowl* howling_array = container_ptr(parent_container);
Atomic::add(&howling_array->_num_entries, diff, memory_order_relaxed);
- G1CardSetHashTableValue* table_entry = get_card_set(card_region);
+ G1CardSetHashTableValue* table_entry = get_container(card_region);
assert(table_entry != nullptr, "Table entry not found for transferred cards");
Atomic::add(&table_entry->_num_occupied, diff, memory_order_relaxed);
@@ -639,72 +633,75 @@ void G1CardSet::transfer_cards_in_howl(CardSetPtr parent_card_set,
}
}
-G1AddCardResult G1CardSet::add_to_card_set(volatile CardSetPtr* card_set_addr, CardSetPtr card_set, uint card_region, uint card_in_region, bool increment_total) {
- assert(card_set_addr != nullptr, "Cannot add to empty cardset");
+G1AddCardResult G1CardSet::add_to_container(ContainerPtr volatile* container_addr,
+ ContainerPtr container,
+ uint card_region,
+ uint card_in_region,
+ bool increment_total) {
+ assert(container_addr != nullptr, "must be");
G1AddCardResult add_result;
- switch (card_set_type(card_set)) {
- case CardSetInlinePtr: {
- add_result = add_to_inline_ptr(card_set_addr, card_set, card_in_region);
+ switch (container_type(container)) {
+ case ContainerInlinePtr: {
+ add_result = add_to_inline_ptr(container_addr, container, card_in_region);
break;
}
- case CardSetArrayOfCards : {
- add_result = add_to_array(card_set, card_in_region);
+ case ContainerArrayOfCards: {
+ add_result = add_to_array(container, card_in_region);
break;
}
- case CardSetBitMap: {
- add_result = add_to_bitmap(card_set, card_in_region);
+ case ContainerBitMap: {
+ add_result = add_to_bitmap(container, card_in_region);
break;
}
- case CardSetHowl: {
- assert(CardSetHowl == card_set_type(FullCardSet), "must be");
- if (card_set == FullCardSet) {
+ case ContainerHowl: {
+ assert(ContainerHowl == container_type(FullCardSet), "must be");
+ if (container == FullCardSet) {
return Found;
}
- add_result = add_to_howl(card_set, card_region, card_in_region, increment_total);
+ add_result = add_to_howl(container, card_region, card_in_region, increment_total);
break;
}
default:
ShouldNotReachHere();
}
-
return add_result;
}
-G1CardSetHashTableValue* G1CardSet::get_or_add_card_set(uint card_region, bool* should_grow_table) {
+G1CardSetHashTableValue* G1CardSet::get_or_add_container(uint card_region, bool* should_grow_table) {
return _table->get_or_add(card_region, should_grow_table);
}
-G1CardSetHashTableValue* G1CardSet::get_card_set(uint card_region) {
+G1CardSetHashTableValue* G1CardSet::get_container(uint card_region) {
return _table->get(card_region);
}
G1AddCardResult G1CardSet::add_card(uint card_region, uint card_in_region, bool increment_total) {
G1AddCardResult add_result;
- CardSetPtr to_transfer = nullptr;
- CardSetPtr card_set;
+ ContainerPtr to_transfer = nullptr;
+ ContainerPtr container;
bool should_grow_table = false;
- G1CardSetHashTableValue* table_entry = get_or_add_card_set(card_region, &should_grow_table);
+ G1CardSetHashTableValue* table_entry = get_or_add_container(card_region, &should_grow_table);
while (true) {
- card_set = acquire_card_set(&table_entry->_card_set);
- add_result = add_to_card_set(&table_entry->_card_set, card_set, card_region, card_in_region, increment_total);
+ container = acquire_container(&table_entry->_container);
+ add_result = add_to_container(&table_entry->_container, container, card_region, card_in_region, increment_total);
if (add_result != Overflow) {
break;
}
// Card set has overflown. Coarsen or retry.
- bool coarsened = coarsen_card_set(&table_entry->_card_set, card_set, card_in_region);
- _coarsen_stats.record_coarsening(card_set_type(card_set), !coarsened);
+ bool coarsened = coarsen_container(&table_entry->_container, container, card_in_region);
+ _coarsen_stats.record_coarsening(container_type(container), !coarsened);
if (coarsened) {
- // We have been the one coarsening this card set (and in the process added that card).
+ // We successful coarsened this card set container (and in the process added the card).
add_result = Added;
- to_transfer = card_set;
+ to_transfer = container;
break;
}
// Somebody else beat us to coarsening. Retry.
- release_and_maybe_free_card_set(card_set);
+ release_and_maybe_free_container(container);
}
if (increment_total && add_result == Added) {
@@ -718,7 +715,7 @@ G1AddCardResult G1CardSet::add_card(uint card_region, uint card_in_region, bool
transfer_cards(table_entry, to_transfer, card_region);
}
- release_and_maybe_free_card_set(card_set);
+ release_and_maybe_free_container(container);
return add_result;
}
@@ -727,29 +724,29 @@ bool G1CardSet::contains_card(uint card_region, uint card_in_region) {
assert(card_in_region < _config->max_cards_in_region(),
"Card %u is beyond max %u", card_in_region, _config->max_cards_in_region());
- // Protect the card set from reclamation.
+ // Protect the card set container from reclamation.
GlobalCounter::CriticalSection cs(Thread::current());
- G1CardSetHashTableValue* table_entry = get_card_set(card_region);
+ G1CardSetHashTableValue* table_entry = get_container(card_region);
if (table_entry == nullptr) {
return false;
}
- CardSetPtr card_set = table_entry->_card_set;
- if (card_set == FullCardSet) {
+ ContainerPtr container = table_entry->_container;
+ if (container == FullCardSet) {
// contains_card() is not a performance critical method so we do not hide that
// case in the switch below.
return true;
}
- switch (card_set_type(card_set)) {
- case CardSetInlinePtr: {
- G1CardSetInlinePtr ptr(card_set);
+ switch (container_type(container)) {
+ case ContainerInlinePtr: {
+ G1CardSetInlinePtr ptr(container);
return ptr.contains(card_in_region, _config->inline_ptr_bits_per_card());
}
- case CardSetArrayOfCards : return card_set_ptr(card_set)->contains(card_in_region);
- case CardSetBitMap: return card_set_ptr(card_set)->contains(card_in_region, _config->max_cards_in_howl_bitmap());
- case CardSetHowl: {
- G1CardSetHowl* howling_array = card_set_ptr(card_set);
+ case ContainerArrayOfCards: return container_ptr(container)->contains(card_in_region);
+ case ContainerBitMap: return container_ptr(container)->contains(card_in_region, _config->max_cards_in_howl_bitmap());
+ case ContainerHowl: {
+ G1CardSetHowl* howling_array = container_ptr(container);
return howling_array->contains(card_in_region, _config);
}
@@ -759,53 +756,53 @@ bool G1CardSet::contains_card(uint card_region, uint card_in_region) {
}
void G1CardSet::print_info(outputStream* st, uint card_region, uint card_in_region) {
- G1CardSetHashTableValue* table_entry = get_card_set(card_region);
+ G1CardSetHashTableValue* table_entry = get_container(card_region);
if (table_entry == nullptr) {
st->print("NULL card set");
return;
}
- CardSetPtr card_set = table_entry->_card_set;
- if (card_set == FullCardSet) {
+ ContainerPtr container = table_entry->_container;
+ if (container == FullCardSet) {
st->print("FULL card set)");
return;
}
- switch (card_set_type(card_set)) {
- case CardSetInlinePtr: {
+ switch (container_type(container)) {
+ case ContainerInlinePtr: {
st->print("InlinePtr not containing %u", card_in_region);
break;
}
- case CardSetArrayOfCards : {
+ case ContainerArrayOfCards: {
st->print("AoC not containing %u", card_in_region);
break;
}
- case CardSetBitMap: {
+ case ContainerBitMap: {
st->print("BitMap not containing %u", card_in_region);
break;
}
- case CardSetHowl: {
- st->print("CardSetHowl not containing %u", card_in_region);
+ case ContainerHowl: {
+ st->print("ContainerHowl not containing %u", card_in_region);
break;
}
- default: st->print("Unknown card set type %u", card_set_type(card_set)); ShouldNotReachHere(); break;
+ default: st->print("Unknown card set container type %u", container_type(container)); ShouldNotReachHere(); break;
}
}
template
-void G1CardSet::iterate_cards_during_transfer(CardSetPtr const card_set, CardVisitor& cl) {
- uint type = card_set_type(card_set);
- assert(type == CardSetInlinePtr || type == CardSetArrayOfCards,
+void G1CardSet::iterate_cards_during_transfer(ContainerPtr const container, CardVisitor& cl) {
+ uint type = container_type(container);
+ assert(type == ContainerInlinePtr || type == ContainerArrayOfCards,
"invalid card set type %d to transfer from",
- card_set_type(card_set));
+ container_type(container));
switch (type) {
- case CardSetInlinePtr: {
- G1CardSetInlinePtr ptr(card_set);
+ case ContainerInlinePtr: {
+ G1CardSetInlinePtr ptr(container);
ptr.iterate(cl, _config->inline_ptr_bits_per_card());
return;
}
- case CardSetArrayOfCards : {
- card_set_ptr(card_set)->iterate(cl);
+ case ContainerArrayOfCards: {
+ container_ptr(container)->iterate(cl);
return;
}
default:
@@ -813,7 +810,7 @@ void G1CardSet::iterate_cards_during_transfer(CardSetPtr const card_set, CardVis
}
}
-void G1CardSet::iterate_containers(CardSetPtrClosure* cl, bool at_safepoint) {
+void G1CardSet::iterate_containers(ContainerPtrClosure* cl, bool at_safepoint) {
if (at_safepoint) {
_table->iterate_safepoint(cl);
} else {
@@ -844,7 +841,7 @@ public:
};
template class CardOrRanges>
-class G1CardSetContainersClosure : public G1CardSet::CardSetPtrClosure {
+class G1CardSetContainersClosure : public G1CardSet::ContainerPtrClosure {
G1CardSet* _card_set;
Closure& _cl;
@@ -855,9 +852,9 @@ public:
_card_set(card_set),
_cl(cl) { }
- void do_cardsetptr(uint region_idx, size_t num_occupied, G1CardSet::CardSetPtr card_set) override {
+ void do_containerptr(uint region_idx, size_t num_occupied, G1CardSet::ContainerPtr container) override {
CardOrRanges cl(_cl, region_idx);
- _card_set->iterate_cards_or_ranges_in_container(card_set, cl);
+ _card_set->iterate_cards_or_ranges_in_container(container, cl);
}
};
@@ -879,13 +876,13 @@ size_t G1CardSet::occupied() const {
}
size_t G1CardSet::num_containers() {
- class GetNumberOfContainers : public CardSetPtrClosure {
+ class GetNumberOfContainers : public ContainerPtrClosure {
public:
size_t _count;
- GetNumberOfContainers() : CardSetPtrClosure(), _count(0) { }
+ GetNumberOfContainers() : ContainerPtrClosure(), _count(0) { }
- void do_cardsetptr(uint region_idx, size_t num_occupied, CardSetPtr card_set) override {
+ void do_containerptr(uint region_idx, size_t num_occupied, ContainerPtr container) override {
_count++;
}
} cl;
diff --git a/src/hotspot/share/gc/g1/g1CardSet.hpp b/src/hotspot/share/gc/g1/g1CardSet.hpp
index 465984d713873d76bb52e42928534b0c364eb39c..946d8cb73382b954ecf23d23124c909edc3e477f 100644
--- a/src/hotspot/share/gc/g1/g1CardSet.hpp
+++ b/src/hotspot/share/gc/g1/g1CardSet.hpp
@@ -26,10 +26,7 @@
#define SHARE_GC_G1_G1CARDSET_HPP
#include "memory/allocation.hpp"
-#include "memory/padded.hpp"
-#include "oops/oopsHierarchy.hpp"
#include "utilities/concurrentHashTable.hpp"
-#include "utilities/lockFreeStack.hpp"
class G1CardSetAllocOptions;
class G1CardSetHashTable;
@@ -147,10 +144,10 @@ public:
class G1CardSetCoarsenStats {
public:
// Number of entries in the statistics tables: since we index with the source
- // cardset of the coarsening, this is the total number of combinations of
- // card sets - 1.
+ // container of the coarsening, this is the total number of combinations of
+ // card set containers - 1.
static constexpr size_t NumCoarsenCategories = 7;
- // Coarsening statistics for the possible CardSetPtr in the Howl card set
+ // Coarsening statistics for the possible ContainerPtr in the Howl card set
// start from this offset.
static constexpr size_t CoarsenHowlOffset = 4;
@@ -173,14 +170,14 @@ public:
void print_on(outputStream* out);
};
-// Sparse set of card indexes comprising a remembered set on the Java heap. Card
+// Set of card indexes comprising a remembered set on the Java heap. Card
// size is assumed to be card table card size.
//
// Technically it is implemented using a ConcurrentHashTable that stores a card
// set container for every region containing at least one card.
//
// There are in total five different containers, encoded in the ConcurrentHashTable
-// node as CardSetPtr. A CardSetPtr may cover the whole region or just a part of
+// node as ContainerPtr. A ContainerPtr may cover the whole region or just a part of
// it.
// See its description below for more information.
class G1CardSet : public CHeapObj {
@@ -194,46 +191,46 @@ class G1CardSet : public CHeapObj {
static G1CardSetCoarsenStats _coarsen_stats; // Coarsening statistics since VM start.
static G1CardSetCoarsenStats _last_coarsen_stats; // Coarsening statistics at last GC.
public:
- // Two lower bits are used to encode the card storage types
- static const uintptr_t CardSetPtrHeaderSize = 2;
+ // Two lower bits are used to encode the card set container types
+ static const uintptr_t ContainerPtrHeaderSize = 2;
- // CardSetPtr represents the card storage type of a given covered area. It encodes
- // a type in the LSBs, in addition to having a few significant values.
+ // ContainerPtr represents the card set container type of a given covered area.
+ // It encodes a type in the LSBs, in addition to having a few significant values.
//
// Possible encodings:
//
// 0...00000 free (Empty, should never happen)
- // 1...11111 full All card indexes in the whole area this CardSetPtr covers are part of this container.
- // X...XXX00 inline-ptr-cards A handful of card indexes covered by this CardSetPtr are encoded within the CardSetPtr.
+ // 1...11111 full All card indexes in the whole area this ContainerPtr covers are part of this container.
+ // X...XXX00 inline-ptr-cards A handful of card indexes covered by this ContainerPtr are encoded within the ContainerPtr.
// X...XXX01 array of cards The container is a contiguous array of card indexes.
// X...XXX10 bitmap The container uses a bitmap to determine whether a given index is part of this set.
- // X...XXX11 howl This is a card set container containing an array of CardSetPtr, with each CardSetPtr
+ // X...XXX11 howl This is a card set container containing an array of ContainerPtr, with each ContainerPtr
// limited to a sub-range of the original range. Currently only one level of this
// container is supported.
- typedef void* CardSetPtr;
+ using ContainerPtr = void*;
// Coarsening happens in the order below:
- // CardSetInlinePtr -> CardSetArrayOfCards -> CardSetHowl -> Full
- // Corsening of containers inside the CardSetHowl happens in the order:
- // CardSetInlinePtr -> CardSetArrayOfCards -> CardSetBitMap -> Full
- static const uintptr_t CardSetInlinePtr = 0x0;
- static const uintptr_t CardSetArrayOfCards = 0x1;
- static const uintptr_t CardSetBitMap = 0x2;
- static const uintptr_t CardSetHowl = 0x3;
+ // ContainerInlinePtr -> ContainerArrayOfCards -> ContainerHowl -> Full
+ // Corsening of containers inside the ContainerHowl happens in the order:
+ // ContainerInlinePtr -> ContainerArrayOfCards -> ContainerBitMap -> Full
+ static const uintptr_t ContainerInlinePtr = 0x0;
+ static const uintptr_t ContainerArrayOfCards = 0x1;
+ static const uintptr_t ContainerBitMap = 0x2;
+ static const uintptr_t ContainerHowl = 0x3;
// The special sentinel values
- static constexpr CardSetPtr FreeCardSet = nullptr;
- // Unfortunately we can't make (G1CardSet::CardSetPtr)-1 constexpr because
+ static constexpr ContainerPtr FreeCardSet = nullptr;
+ // Unfortunately we can't make (G1CardSet::ContainerPtr)-1 constexpr because
// reinterpret_casts are forbidden in constexprs. Use a regular static instead.
- static CardSetPtr FullCardSet;
+ static ContainerPtr FullCardSet;
- static const uintptr_t CardSetPtrTypeMask = ((uintptr_t)1 << CardSetPtrHeaderSize) - 1;
+ static const uintptr_t ContainerPtrTypeMask = ((uintptr_t)1 << ContainerPtrHeaderSize) - 1;
- static CardSetPtr strip_card_set_type(CardSetPtr ptr) { return (CardSetPtr)((uintptr_t)ptr & ~CardSetPtrTypeMask); }
+ static ContainerPtr strip_container_type(ContainerPtr ptr) { return (ContainerPtr)((uintptr_t)ptr & ~ContainerPtrTypeMask); }
- static uint card_set_type(CardSetPtr ptr) { return (uintptr_t)ptr & CardSetPtrTypeMask; }
+ static uint container_type(ContainerPtr ptr) { return (uintptr_t)ptr & ContainerPtrTypeMask; }
template
- static T* card_set_ptr(CardSetPtr ptr);
+ static T* container_ptr(ContainerPtr ptr);
private:
G1CardSetMemoryManager* _mm;
@@ -245,42 +242,42 @@ private:
// be (slightly) more cards in the card set than this value in reality.
size_t _num_occupied;
- CardSetPtr make_card_set_ptr(void* value, uintptr_t type);
+ ContainerPtr make_container_ptr(void* value, uintptr_t type);
- CardSetPtr acquire_card_set(CardSetPtr volatile* card_set_addr);
- // Returns true if the card set should be released
- bool release_card_set(CardSetPtr card_set);
+ ContainerPtr acquire_container(ContainerPtr volatile* container_addr);
+ // Returns true if the card set container should be released
+ bool release_container(ContainerPtr container);
// Release card set and free if needed.
- void release_and_maybe_free_card_set(CardSetPtr card_set);
+ void release_and_maybe_free_container(ContainerPtr container);
// Release card set and free (and it must be freeable).
- void release_and_must_free_card_set(CardSetPtr card_set);
+ void release_and_must_free_container(ContainerPtr container);
- // Coarsens the CardSet cur_card_set to the next level; tries to replace the
- // previous CardSet with a new one which includes the given card_in_region.
- // coarsen_card_set does not transfer cards from cur_card_set
- // to the new card_set. Transfer is achieved by transfer_cards.
- // Returns true if this was the thread that coarsened the CardSet (and added the card).
- bool coarsen_card_set(CardSetPtr volatile* card_set_addr,
- CardSetPtr cur_card_set,
- uint card_in_region, bool within_howl = false);
+ // Coarsens the card set container cur_container to the next level; tries to replace the
+ // previous ContainerPtr with a new one which includes the given card_in_region.
+ // coarsen_container does not transfer cards from cur_container
+ // to the new container. Transfer is achieved by transfer_cards.
+ // Returns true if this was the thread that coarsened the container (and added the card).
+ bool coarsen_container(ContainerPtr volatile* container_addr,
+ ContainerPtr cur_container,
+ uint card_in_region, bool within_howl = false);
- CardSetPtr create_coarsened_array_of_cards(uint card_in_region, bool within_howl);
+ ContainerPtr create_coarsened_array_of_cards(uint card_in_region, bool within_howl);
// Transfer entries from source_card_set to a recently installed coarser storage type
- // We only need to transfer anything finer than CardSetBitMap. "Full" contains
+ // We only need to transfer anything finer than ContainerBitMap. "Full" contains
// all elements anyway.
- void transfer_cards(G1CardSetHashTableValue* table_entry, CardSetPtr source_card_set, uint card_region);
- void transfer_cards_in_howl(CardSetPtr parent_card_set, CardSetPtr source_card_set, uint card_region);
+ void transfer_cards(G1CardSetHashTableValue* table_entry, ContainerPtr source_container, uint card_region);
+ void transfer_cards_in_howl(ContainerPtr parent_container, ContainerPtr source_container, uint card_region);
- G1AddCardResult add_to_card_set(CardSetPtr volatile* card_set_addr, CardSetPtr card_set, uint card_region, uint card, bool increment_total = true);
+ G1AddCardResult add_to_container(ContainerPtr volatile* container_addr, ContainerPtr container, uint card_region, uint card, bool increment_total = true);
- G1AddCardResult add_to_inline_ptr(CardSetPtr volatile* card_set_addr, CardSetPtr card_set, uint card_in_region);
- G1AddCardResult add_to_array(CardSetPtr card_set, uint card_in_region);
- G1AddCardResult add_to_bitmap(CardSetPtr card_set, uint card_in_region);
- G1AddCardResult add_to_howl(CardSetPtr parent_card_set, uint card_region, uint card_in_region, bool increment_total = true);
+ G1AddCardResult add_to_inline_ptr(ContainerPtr volatile* container_addr, ContainerPtr container, uint card_in_region);
+ G1AddCardResult add_to_array(ContainerPtr container, uint card_in_region);
+ G1AddCardResult add_to_bitmap(ContainerPtr container, uint card_in_region);
+ G1AddCardResult add_to_howl(ContainerPtr parent_container, uint card_region, uint card_in_region, bool increment_total = true);
- G1CardSetHashTableValue* get_or_add_card_set(uint card_region, bool* should_grow_table);
- G1CardSetHashTableValue* get_card_set(uint card_region);
+ G1CardSetHashTableValue* get_or_add_container(uint card_region, bool* should_grow_table);
+ G1CardSetHashTableValue* get_container(uint card_region);
// Iterate over cards of a card set container during transfer of the cards from
// one container to another. Executes
@@ -289,11 +286,11 @@ private:
//
// on the given class.
template
- void iterate_cards_during_transfer(CardSetPtr const card_set, CardVisitor& vl);
+ void iterate_cards_during_transfer(ContainerPtr const container, CardVisitor& vl);
- uint card_set_type_to_mem_object_type(uintptr_t type) const;
+ uint container_type_to_mem_object_type(uintptr_t type) const;
uint8_t* allocate_mem_object(uintptr_t type);
- void free_mem_object(CardSetPtr card_set);
+ void free_mem_object(ContainerPtr container);
public:
G1CardSetConfiguration* config() const { return _config; }
@@ -302,8 +299,8 @@ public:
G1CardSet(G1CardSetConfiguration* config, G1CardSetMemoryManager* mm);
virtual ~G1CardSet();
- // Adds the given card to this set, returning an appropriate result. If added,
- // updates the total count.
+ // Adds the given card to this set, returning an appropriate result.
+ // If incremental_count is true and the card has been added, updates the total count.
G1AddCardResult add_card(uint card_region, uint card_in_region, bool increment_total = true);
bool contains_card(uint card_region, uint card_in_region);
@@ -351,14 +348,14 @@ public:
// start_iterate().
//
template
- void iterate_cards_or_ranges_in_container(CardSetPtr const card_set, CardOrRangeVisitor& cl);
+ void iterate_cards_or_ranges_in_container(ContainerPtr const container, CardOrRangeVisitor& cl);
- class CardSetPtrClosure {
+ class ContainerPtrClosure {
public:
- virtual void do_cardsetptr(uint region_idx, size_t num_occupied, CardSetPtr card_set) = 0;
+ virtual void do_containerptr(uint region_idx, size_t num_occupied, ContainerPtr container) = 0;
};
- void iterate_containers(CardSetPtrClosure* cl, bool safepoint = false);
+ void iterate_containers(ContainerPtrClosure* cl, bool safepoint = false);
class CardClosure {
public:
@@ -370,13 +367,13 @@ public:
class G1CardSetHashTableValue {
public:
- using CardSetPtr = G1CardSet::CardSetPtr;
+ using ContainerPtr = G1CardSet::ContainerPtr;
const uint _region_idx;
uint volatile _num_occupied;
- CardSetPtr volatile _card_set;
+ ContainerPtr volatile _container;
- G1CardSetHashTableValue(uint region_idx, CardSetPtr card_set) : _region_idx(region_idx), _num_occupied(0), _card_set(card_set) { }
+ G1CardSetHashTableValue(uint region_idx, ContainerPtr container) : _region_idx(region_idx), _num_occupied(0), _container(container) { }
};
class G1CardSetHashTableConfig : public StackObj {
@@ -391,6 +388,6 @@ public:
static void free_node(void* context, void* memory, Value const& value);
};
-typedef ConcurrentHashTable CardSetHash;
+using CardSetHash = ConcurrentHashTable;
#endif // SHARE_GC_G1_G1CARDSET_HPP
diff --git a/src/hotspot/share/gc/g1/g1CardSet.inline.hpp b/src/hotspot/share/gc/g1/g1CardSet.inline.hpp
index 99938b4b74eb55313e244ecfebe5b07806dd9c55..49d7928735a300f577aa4cdc6c85b30d843de5a5 100644
--- a/src/hotspot/share/gc/g1/g1CardSet.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1CardSet.inline.hpp
@@ -28,55 +28,54 @@
#include "gc/g1/g1CardSet.hpp"
#include "gc/g1/g1CardSetContainers.inline.hpp"
#include "gc/g1/g1GCPhaseTimes.hpp"
-#include "runtime/atomic.hpp"
#include "logging/log.hpp"
template
-inline T* G1CardSet::card_set_ptr(CardSetPtr ptr) {
- return (T*)strip_card_set_type(ptr);
+inline T* G1CardSet::container_ptr(ContainerPtr ptr) {
+ return (T*)strip_container_type(ptr);
}
-inline G1CardSet::CardSetPtr G1CardSet::make_card_set_ptr(void* value, uintptr_t type) {
- assert(card_set_type(value) == 0, "Given ptr " PTR_FORMAT " already has type bits set", p2i(value));
- return (CardSetPtr)((uintptr_t)value | type);
+inline G1CardSet::ContainerPtr G1CardSet::make_container_ptr(void* value, uintptr_t type) {
+ assert(container_type(value) == 0, "Given ptr " PTR_FORMAT " already has type bits set", p2i(value));
+ return (ContainerPtr)((uintptr_t)value | type);
}
template
-inline void G1CardSet::iterate_cards_or_ranges_in_container(CardSetPtr const card_set, CardOrRangeVisitor& cl) {
- switch (card_set_type(card_set)) {
- case CardSetInlinePtr: {
+inline void G1CardSet::iterate_cards_or_ranges_in_container(ContainerPtr const container, CardOrRangeVisitor& cl) {
+ switch (container_type(container)) {
+ case ContainerInlinePtr: {
if (cl.start_iterate(G1GCPhaseTimes::MergeRSMergedInline)) {
- G1CardSetInlinePtr ptr(card_set);
+ G1CardSetInlinePtr ptr(container);
ptr.iterate(cl, _config->inline_ptr_bits_per_card());
}
return;
}
- case CardSetArrayOfCards : {
+ case ContainerArrayOfCards: {
if (cl.start_iterate(G1GCPhaseTimes::MergeRSMergedArrayOfCards)) {
- card_set_ptr(card_set)->iterate(cl);
+ container_ptr(container)->iterate(cl);
}
return;
}
- case CardSetBitMap: {
+ case ContainerBitMap: {
// There is no first-level bitmap spanning the whole area.
ShouldNotReachHere();
return;
}
- case CardSetHowl: {
- assert(card_set_type(FullCardSet) == CardSetHowl, "Must be");
- if (card_set == FullCardSet) {
+ case ContainerHowl: {
+ assert(container_type(FullCardSet) == ContainerHowl, "Must be");
+ if (container == FullCardSet) {
if (cl.start_iterate(G1GCPhaseTimes::MergeRSMergedFull)) {
cl(0, _config->max_cards_in_region());
}
return;
}
if (cl.start_iterate(G1GCPhaseTimes::MergeRSMergedHowl)) {
- card_set_ptr(card_set)->iterate(cl, _config);
+ container_ptr(container)->iterate(cl, _config);
}
return;
}
}
- log_error(gc)("Unkown card set type %u", card_set_type(card_set));
+ log_error(gc)("Unkown card set container type %u", container_type(container));
ShouldNotReachHere();
}
diff --git a/src/hotspot/share/gc/g1/g1CardSetContainers.hpp b/src/hotspot/share/gc/g1/g1CardSetContainers.hpp
index 4f861baf4eb41915271345dc86a81421234ad9ce..453594da3f9e570d0ad7da90999cd5bb544c14e7 100644
--- a/src/hotspot/share/gc/g1/g1CardSetContainers.hpp
+++ b/src/hotspot/share/gc/g1/g1CardSetContainers.hpp
@@ -30,13 +30,8 @@
#include "runtime/atomic.hpp"
#include "utilities/bitMap.hpp"
#include "utilities/globalDefinitions.hpp"
-#include "utilities/spinYield.hpp"
-#include "logging/log.hpp"
-
-#include "runtime/thread.inline.hpp"
-
-// A helper class to encode a few card indexes within a CardSetPtr.
+// A helper class to encode a few card indexes within a ContainerPtr.
//
// The pointer value (either 32 or 64 bits) is split into two areas:
//
@@ -70,16 +65,16 @@
class G1CardSetInlinePtr : public StackObj {
friend class G1CardSetContainersTest;
- typedef G1CardSet::CardSetPtr CardSetPtr;
+ using ContainerPtr = G1CardSet::ContainerPtr;
- CardSetPtr volatile * _value_addr;
- CardSetPtr _value;
+ ContainerPtr volatile * _value_addr;
+ ContainerPtr _value;
static const uint SizeFieldLen = 3;
static const uint SizeFieldPos = 2;
- static const uint HeaderSize = G1CardSet::CardSetPtrHeaderSize + SizeFieldLen;
+ static const uint HeaderSize = G1CardSet::ContainerPtrHeaderSize + SizeFieldLen;
- static const uint BitsInValue = sizeof(CardSetPtr) * BitsPerByte;
+ static const uint BitsInValue = sizeof(ContainerPtr) * BitsPerByte;
static const uintptr_t SizeFieldMask = (((uint)1 << SizeFieldLen) - 1) << SizeFieldPos;
@@ -87,9 +82,9 @@ class G1CardSetInlinePtr : public StackObj {
return (idx * bits_per_card + HeaderSize);
}
- static CardSetPtr merge(CardSetPtr orig_value, uint card_in_region, uint idx, uint bits_per_card);
+ static ContainerPtr merge(ContainerPtr orig_value, uint card_in_region, uint idx, uint bits_per_card);
- static uint card_at(CardSetPtr value, uint const idx, uint const bits_per_card) {
+ static uint card_at(ContainerPtr value, uint const idx, uint const bits_per_card) {
uint8_t card_pos = card_pos_for(idx, bits_per_card);
uint result = ((uintptr_t)value >> card_pos) & (((uintptr_t)1 << bits_per_card) - 1);
return result;
@@ -98,14 +93,14 @@ class G1CardSetInlinePtr : public StackObj {
uint find(uint const card_idx, uint const bits_per_card, uint start_at, uint num_cards);
public:
- G1CardSetInlinePtr() : _value_addr(nullptr), _value((CardSetPtr)G1CardSet::CardSetInlinePtr) { }
+ G1CardSetInlinePtr() : _value_addr(nullptr), _value((ContainerPtr)G1CardSet::ContainerInlinePtr) { }
- G1CardSetInlinePtr(CardSetPtr value) : _value_addr(nullptr), _value(value) {
- assert(G1CardSet::card_set_type(_value) == G1CardSet::CardSetInlinePtr, "Value " PTR_FORMAT " is not a valid G1CardSetInPtr.", p2i(_value));
+ G1CardSetInlinePtr(ContainerPtr value) : _value_addr(nullptr), _value(value) {
+ assert(G1CardSet::container_type(_value) == G1CardSet::ContainerInlinePtr, "Value " PTR_FORMAT " is not a valid G1CardSetInlinePtr.", p2i(_value));
}
- G1CardSetInlinePtr(CardSetPtr volatile* value_addr, CardSetPtr value) : _value_addr(value_addr), _value(value) {
- assert(G1CardSet::card_set_type(_value) == G1CardSet::CardSetInlinePtr, "Value " PTR_FORMAT " is not a valid G1CardSetInPtr.", p2i(_value));
+ G1CardSetInlinePtr(ContainerPtr volatile* value_addr, ContainerPtr value) : _value_addr(value_addr), _value(value) {
+ assert(G1CardSet::container_type(_value) == G1CardSet::ContainerInlinePtr, "Value " PTR_FORMAT " is not a valid G1CardSetInlinePtr.", p2i(_value));
}
G1AddCardResult add(uint const card_idx, uint const bits_per_card, uint const max_cards_in_inline_ptr);
@@ -115,13 +110,13 @@ public:
template
void iterate(CardVisitor& found, uint const bits_per_card);
- operator CardSetPtr () { return _value; }
+ operator ContainerPtr () { return _value; }
static uint max_cards_in_inline_ptr(uint bits_per_card) {
return (BitsInValue - HeaderSize) / bits_per_card;
}
- static uint num_cards_in(CardSetPtr value) {
+ static uint num_cards_in(ContainerPtr value) {
return ((uintptr_t)value & SizeFieldMask) >> SizeFieldPos;
}
};
@@ -143,18 +138,12 @@ public:
// To maintain these constraints, live objects should have ((_ref_count & 0x1) == 1),
// which requires that we increment the reference counts by 2 starting at _ref_count = 3.
//
-// When such an object is on a free list, we reuse the same field for linking
-// together those free objects.
-//
// All but inline pointers are of this kind. For those, card entries are stored
-// directly in the CardSetPtr of the ConcurrentHashTable node.
+// directly in the ContainerPtr of the ConcurrentHashTable node.
class G1CardSetContainer {
-private:
- union {
- G1CardSetContainer* _next;
- uintptr_t _ref_count;
- };
-
+ uintptr_t _ref_count;
+protected:
+ ~G1CardSetContainer() = default;
public:
G1CardSetContainer() : _ref_count(3) { }
@@ -166,18 +155,6 @@ public:
// to check the value after attempting to decrement.
uintptr_t decrement_refcount();
- G1CardSetContainer* next() {
- return _next;
- }
-
- G1CardSetContainer** next_addr() {
- return &_next;
- }
-
- void set_next(G1CardSetContainer* next) {
- _next = next;
- }
-
// Log of largest card index that can be stored in any G1CardSetContainer
static uint LogCardsPerRegionLimit;
};
@@ -186,7 +163,7 @@ class G1CardSetArray : public G1CardSetContainer {
public:
typedef uint16_t EntryDataType;
typedef uint EntryCountType;
- using CardSetPtr = G1CardSet::CardSetPtr;
+ using ContainerPtr = G1CardSet::ContainerPtr;
private:
EntryCountType _size;
EntryCountType volatile _num_entries;
@@ -240,7 +217,7 @@ class G1CardSetBitMap : public G1CardSetContainer {
size_t _num_bits_set;
BitMap::bm_word_t _bits[1];
- using CardSetPtr = G1CardSet::CardSetPtr;
+ using ContainerPtr = G1CardSet::ContainerPtr;
template
static size_t header_size_in_bytes_internal() {
@@ -275,10 +252,10 @@ public:
class G1CardSetHowl : public G1CardSetContainer {
public:
typedef uint EntryCountType;
- using CardSetPtr = G1CardSet::CardSetPtr;
+ using ContainerPtr = G1CardSet::ContainerPtr;
EntryCountType volatile _num_entries;
private:
- CardSetPtr _buckets[2];
+ ContainerPtr _buckets[2];
// Do not add class member variables beyond this point
template
@@ -286,32 +263,32 @@ private:
return offset_of(Derived, _buckets);
}
- // Iterates over the given CardSetPtr with at index in this Howl card set,
+ // Iterates over the given ContainerPtr with at index in this Howl card set,
// applying a CardOrRangeVisitor on it.
template
- void iterate_cardset(CardSetPtr const card_set, uint index, CardOrRangeVisitor& found, G1CardSetConfiguration* config);
+ void iterate_cardset(ContainerPtr const container, uint index, CardOrRangeVisitor& found, G1CardSetConfiguration* config);
public:
G1CardSetHowl(EntryCountType card_in_region, G1CardSetConfiguration* config);
- CardSetPtr* get_card_set_addr(EntryCountType index) {
+ ContainerPtr* get_container_addr(EntryCountType index) {
return &_buckets[index];
}
bool contains(uint card_idx, G1CardSetConfiguration* config);
- // Iterates over all CardSetPtrs in this Howl card set, applying a CardOrRangeVisitor
+ // Iterates over all ContainerPtrs in this Howl card set, applying a CardOrRangeVisitor
// on it.
template
void iterate(CardOrRangeVisitor& found, G1CardSetConfiguration* config);
- // Iterates over all CardSetPtrs in this Howl card set. Calls
+ // Iterates over all ContainerPtrs in this Howl card set. Calls
//
- // void operator ()(CardSetPtr* card_set_addr);
+ // void operator ()(ContainerPtr* card_set_addr);
//
// on all of them.
- template
- void iterate(CardSetPtrVisitor& found, uint num_card_sets);
+ template
+ void iterate(ContainerPtrVisitor& found, uint num_card_sets);
static EntryCountType num_buckets(size_t size_in_bits, size_t num_cards_in_array, size_t max_buckets);
@@ -323,7 +300,7 @@ public:
static size_t header_size_in_bytes() { return header_size_in_bytes_internal(); }
static size_t size_in_bytes(size_t num_arrays) {
- return header_size_in_bytes() + sizeof(CardSetPtr) * num_arrays;
+ return header_size_in_bytes() + sizeof(ContainerPtr) * num_arrays;
}
};
diff --git a/src/hotspot/share/gc/g1/g1CardSetContainers.inline.hpp b/src/hotspot/share/gc/g1/g1CardSetContainers.inline.hpp
index 13e70302b023e97e4e8349327af639082de7f515..3949687a97c2fefabd10abefe252d284564844d4 100644
--- a/src/hotspot/share/gc/g1/g1CardSetContainers.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1CardSetContainers.inline.hpp
@@ -29,8 +29,9 @@
#include "gc/g1/g1GCPhaseTimes.hpp"
#include "utilities/bitMap.inline.hpp"
#include "utilities/globalDefinitions.hpp"
+#include "utilities/spinYield.hpp"
-inline G1CardSetInlinePtr::CardSetPtr G1CardSetInlinePtr::merge(CardSetPtr orig_value, uint card_in_region, uint idx, uint bits_per_card) {
+inline G1CardSetInlinePtr::ContainerPtr G1CardSetInlinePtr::merge(ContainerPtr orig_value, uint card_in_region, uint idx, uint bits_per_card) {
assert((idx & (SizeFieldMask >> SizeFieldPos)) == idx, "Index %u too large to fit into size field", idx);
assert(card_in_region < ((uint)1 << bits_per_card), "Card %u too large to fit into card value field", card_in_region);
@@ -43,7 +44,7 @@ inline G1CardSetInlinePtr::CardSetPtr G1CardSetInlinePtr::merge(CardSetPtr orig_
uintptr_t value = ((uintptr_t)(idx + 1) << SizeFieldPos) | ((uintptr_t)card_in_region << card_pos);
uintptr_t res = (((uintptr_t)orig_value & ~SizeFieldMask) | value);
- return (CardSetPtr)res;
+ return (ContainerPtr)res;
}
inline G1AddCardResult G1CardSetInlinePtr::add(uint card_idx, uint bits_per_card, uint max_cards_in_inline_ptr) {
@@ -63,8 +64,8 @@ inline G1AddCardResult G1CardSetInlinePtr::add(uint card_idx, uint bits_per_card
if (num_cards >= max_cards_in_inline_ptr) {
return Overflow;
}
- CardSetPtr new_value = merge(_value, card_idx, num_cards, bits_per_card);
- CardSetPtr old_value = Atomic::cmpxchg(_value_addr, _value, new_value, memory_order_relaxed);
+ ContainerPtr new_value = merge(_value, card_idx, num_cards, bits_per_card);
+ ContainerPtr old_value = Atomic::cmpxchg(_value_addr, _value, new_value, memory_order_relaxed);
if (_value == old_value) {
return Added;
}
@@ -72,7 +73,7 @@ inline G1AddCardResult G1CardSetInlinePtr::add(uint card_idx, uint bits_per_card
_value = old_value;
// The value of the pointer may have changed to something different than
// an inline card set. Exit then instead of overwriting.
- if (G1CardSet::card_set_type(_value) != G1CardSet::CardSetInlinePtr) {
+ if (G1CardSet::container_type(_value) != G1CardSet::ContainerInlinePtr) {
return Overflow;
}
}
@@ -267,23 +268,23 @@ inline G1CardSetHowl::G1CardSetHowl(EntryCountType card_in_region, G1CardSetConf
inline bool G1CardSetHowl::contains(uint card_idx, G1CardSetConfiguration* config) {
EntryCountType bucket = config->howl_bucket_index(card_idx);
- CardSetPtr* array_entry = get_card_set_addr(bucket);
- CardSetPtr card_set = Atomic::load_acquire(array_entry);
+ ContainerPtr* array_entry = get_container_addr(bucket);
+ ContainerPtr container = Atomic::load_acquire(array_entry);
- switch (G1CardSet::card_set_type(card_set)) {
- case G1CardSet::CardSetArrayOfCards : {
- return G1CardSet::card_set_ptr(card_set)->contains(card_idx);
+ switch (G1CardSet::container_type(container)) {
+ case G1CardSet::ContainerArrayOfCards: {
+ return G1CardSet::container_ptr