diff -ur --new-file a/src/hotspot/cpu/sparc/abstractInterpreter_sparc.cpp b/src/hotspot/cpu/sparc/abstractInterpreter_sparc.cpp --- a/src/hotspot/cpu/sparc/abstractInterpreter_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/abstractInterpreter_sparc.cpp 2023-04-16 11:42:11.054534350 +0000 @@ -0,0 +1,297 @@ +/* + * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/constMethod.hpp" +#include "oops/klass.inline.hpp" +#include "oops/method.hpp" +#include "runtime/arguments.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/synchronizer.hpp" +#include "utilities/align.hpp" +#include "utilities/macros.hpp" + + +int AbstractInterpreter::BasicType_as_index(BasicType type) { + int i = 0; + switch (type) { + case T_BOOLEAN: i = 0; break; + case T_CHAR : i = 1; break; + case T_BYTE : i = 2; break; + case T_SHORT : i = 3; break; + case T_INT : i = 4; break; + case T_LONG : i = 5; break; + case T_VOID : i = 6; break; + case T_FLOAT : i = 7; break; + case T_DOUBLE : i = 8; break; + case T_OBJECT : i = 9; break; + case T_ARRAY : i = 9; break; + default : ShouldNotReachHere(); + } + assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, "index out of bounds"); + return i; +} + +static int size_activation_helper(int callee_extra_locals, int max_stack, int monitor_size) { + + // Figure out the size of an interpreter frame (in words) given that we have a fully allocated + // expression stack, the callee will have callee_extra_locals (so we can account for + // frame extension) and monitor_size for monitors. Basically we need to calculate + // this exactly like generate_fixed_frame/generate_compute_interpreter_state. + // + // + // The big complicating thing here is that we must ensure that the stack stays properly + // aligned. This would be even uglier if monitor size wasn't modulo what the stack + // needs to be aligned for). We are given that the sp (fp) is already aligned by + // the caller so we must ensure that it is properly aligned for our callee. + // + const int rounded_vm_local_words = + align_up((int)frame::interpreter_frame_vm_local_words,WordsPerLong); + // callee_locals and max_stack are counts, not the size in frame. + const int locals_size = + align_up(callee_extra_locals * Interpreter::stackElementWords, WordsPerLong); + const int max_stack_words = max_stack * Interpreter::stackElementWords; + return (align_up((max_stack_words + + rounded_vm_local_words + + frame::memory_parameter_word_sp_offset), WordsPerLong) + // already rounded + + locals_size + monitor_size); +} + +// How much stack a method top interpreter activation needs in words. +int AbstractInterpreter::size_top_interpreter_activation(Method* method) { + + // See call_stub code + int call_stub_size = align_up(7 + frame::memory_parameter_word_sp_offset, + WordsPerLong); // 7 + register save area + + // Save space for one monitor to get into the interpreted method in case + // the method is synchronized + int monitor_size = method->is_synchronized() ? + 1*frame::interpreter_frame_monitor_size() : 0; + return size_activation_helper(method->max_locals(), method->max_stack(), + monitor_size) + call_stub_size; +} + +int AbstractInterpreter::size_activation(int max_stack, + int temps, + int extra_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame) { + // Note: This calculation must exactly parallel the frame setup + // in TemplateInterpreterGenerator::generate_fixed_frame. + + int monitor_size = monitors * frame::interpreter_frame_monitor_size(); + + assert(is_aligned(monitor_size, WordsPerLong), "must align"); + + // + // Note: if you look closely this appears to be doing something much different + // than generate_fixed_frame. What is happening is this. On sparc we have to do + // this dance with interpreter_sp_adjustment because the window save area would + // appear just below the bottom (tos) of the caller's java expression stack. Because + // the interpreter want to have the locals completely contiguous generate_fixed_frame + // will adjust the caller's sp for the "extra locals" (max_locals - parameter_size). + // Now in generate_fixed_frame the extension of the caller's sp happens in the callee. + // In this code the opposite occurs the caller adjusts it's own stack base on the callee. + // This is mostly ok but it does cause a problem when we get to the initial frame (the oldest) + // because the oldest frame would have adjust its callers frame and yet that frame + // already exists and isn't part of this array of frames we are unpacking. So at first + // glance this would seem to mess up that frame. However Deoptimization::fetch_unroll_info_helper() + // will after it calculates all of the frame's on_stack_size()'s will then figure out the + // amount to adjust the caller of the initial (oldest) frame and the calculation will all + // add up. It does seem like it simpler to account for the adjustment here (and remove the + // callee... parameters here). However this would mean that this routine would have to take + // the caller frame as input so we could adjust its sp (and set it's interpreter_sp_adjustment) + // and run the calling loop in the reverse order. This would also would appear to mean making + // this code aware of what the interactions are when that initial caller fram was an osr or + // other adapter frame. deoptimization is complicated enough and hard enough to debug that + // there is no sense in messing working code. + // + + int rounded_cls = align_up((callee_locals - callee_params), WordsPerLong); + assert(is_aligned(rounded_cls, WordsPerLong), "must align"); + + int raw_frame_size = size_activation_helper(rounded_cls, max_stack, monitor_size); + + return raw_frame_size; +} + +void AbstractInterpreter::layout_activation(Method* method, + int tempcount, + int popframe_extra_args, + int moncount, + int caller_actual_parameters, + int callee_param_count, + int callee_local_count, + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { + // Set up the following variables: + // - Lmethod + // - Llocals + // - Lmonitors (to the indicated number of monitors) + // - Lesp (to the indicated number of temps) + // The frame caller on entry is a description of the caller of the + // frame we are about to layout. We are guaranteed that we will be + // able to fill in a new interpreter frame as its callee (i.e. the + // stack space is allocated and the amount was determined by an + // earlier call to the size_activation() method). On return caller + // while describe the interpreter frame we just layed out. + + // The skeleton frame must already look like an interpreter frame + // even if not fully filled out. + assert(interpreter_frame->is_interpreted_frame(), "Must be interpreted frame"); + + int rounded_vm_local_words = align_up((int)frame::interpreter_frame_vm_local_words,WordsPerLong); + int monitor_size = moncount * frame::interpreter_frame_monitor_size(); + assert(is_aligned(monitor_size, WordsPerLong), "must align"); + + intptr_t* fp = interpreter_frame->fp(); + + JavaThread* thread = JavaThread::current(); + RegisterMap map(thread, false); + // More verification that skeleton frame is properly walkable + assert(fp == caller->sp(), "fp must match"); + + intptr_t* montop = fp - rounded_vm_local_words; + + // preallocate monitors (cf. __ add_monitor_to_stack) + intptr_t* monitors = montop - monitor_size; + + // preallocate stack space + intptr_t* esp = monitors - 1 - + (tempcount * Interpreter::stackElementWords) - + popframe_extra_args; + + int local_words = method->max_locals() * Interpreter::stackElementWords; + NEEDS_CLEANUP; + intptr_t* locals; + if (caller->is_interpreted_frame()) { + // Can force the locals area to end up properly overlapping the top of the expression stack. + intptr_t* Lesp_ptr = caller->interpreter_frame_tos_address() - 1; + // Note that this computation means we replace size_of_parameters() values from the caller + // interpreter frame's expression stack with our argument locals + int parm_words = caller_actual_parameters * Interpreter::stackElementWords; + locals = Lesp_ptr + parm_words; + int delta = local_words - parm_words; + int computed_sp_adjustment = (delta > 0) ? align_up(delta, WordsPerLong) : 0; + *interpreter_frame->register_addr(I5_savedSP) = (intptr_t) (fp + computed_sp_adjustment) - STACK_BIAS; + if (!is_bottom_frame) { + // Llast_SP is set below for the current frame to SP (with the + // extra space for the callee's locals). Here we adjust + // Llast_SP for the caller's frame, removing the extra space + // for the current method's locals. + *caller->register_addr(Llast_SP) = *interpreter_frame->register_addr(I5_savedSP); + } else { + assert(*caller->register_addr(Llast_SP) >= *interpreter_frame->register_addr(I5_savedSP), "strange Llast_SP"); + } + } else { + assert(caller->is_compiled_frame() || caller->is_entry_frame(), "only possible cases"); + // Don't have Lesp available; lay out locals block in the caller + // adjacent to the register window save area. + // + // Compiled frames do not allocate a varargs area which is why this if + // statement is needed. + // + if (caller->is_compiled_frame()) { + locals = fp + frame::register_save_words + local_words - 1; + } else { + locals = fp + frame::memory_parameter_word_sp_offset + local_words - 1; + } + if (!caller->is_entry_frame()) { + // Caller wants his own SP back + int caller_frame_size = caller->cb()->frame_size(); + *interpreter_frame->register_addr(I5_savedSP) = (intptr_t)(caller->fp() - caller_frame_size) - STACK_BIAS; + } + } + if (TraceDeoptimization) { + if (caller->is_entry_frame()) { + // make sure I5_savedSP and the entry frames notion of saved SP + // agree. This assertion duplicate a check in entry frame code + // but catches the failure earlier. + assert(*caller->register_addr(Lscratch) == *interpreter_frame->register_addr(I5_savedSP), + "would change callers SP"); + } + if (caller->is_entry_frame()) { + tty->print("entry "); + } + if (caller->is_compiled_frame()) { + tty->print("compiled "); + if (caller->is_deoptimized_frame()) { + tty->print("(deopt) "); + } + } + if (caller->is_interpreted_frame()) { + tty->print("interpreted "); + } + tty->print_cr("caller fp=" INTPTR_FORMAT " sp=" INTPTR_FORMAT, p2i(caller->fp()), p2i(caller->sp())); + tty->print_cr("save area = " INTPTR_FORMAT ", " INTPTR_FORMAT, p2i(caller->sp()), p2i(caller->sp() + 16)); + tty->print_cr("save area = " INTPTR_FORMAT ", " INTPTR_FORMAT, p2i(caller->fp()), p2i(caller->fp() + 16)); + tty->print_cr("interpreter fp=" INTPTR_FORMAT ", " INTPTR_FORMAT, p2i(interpreter_frame->fp()), p2i(interpreter_frame->sp())); + tty->print_cr("save area = " INTPTR_FORMAT ", " INTPTR_FORMAT, p2i(interpreter_frame->sp()), p2i(interpreter_frame->sp() + 16)); + tty->print_cr("save area = " INTPTR_FORMAT ", " INTPTR_FORMAT, p2i(interpreter_frame->fp()), p2i(interpreter_frame->fp() + 16)); + tty->print_cr("Llocals = " INTPTR_FORMAT, p2i(locals)); + tty->print_cr("Lesp = " INTPTR_FORMAT, p2i(esp)); + tty->print_cr("Lmonitors = " INTPTR_FORMAT, p2i(monitors)); + } + + if (method->max_locals() > 0) { + assert(locals < caller->sp() || locals >= (caller->sp() + 16), "locals in save area"); + assert(locals < caller->fp() || locals > (caller->fp() + 16), "locals in save area"); + assert(locals < interpreter_frame->sp() || locals > (interpreter_frame->sp() + 16), "locals in save area"); + assert(locals < interpreter_frame->fp() || locals >= (interpreter_frame->fp() + 16), "locals in save area"); + } + assert(*interpreter_frame->register_addr(I5_savedSP) & 1, "must be odd"); + + *interpreter_frame->register_addr(Lmethod) = (intptr_t) method; + *interpreter_frame->register_addr(Llocals) = (intptr_t) locals; + *interpreter_frame->register_addr(Lmonitors) = (intptr_t) monitors; + *interpreter_frame->register_addr(Lesp) = (intptr_t) esp; + // Llast_SP will be same as SP as there is no adapter space + *interpreter_frame->register_addr(Llast_SP) = (intptr_t) interpreter_frame->sp() - STACK_BIAS; + *interpreter_frame->register_addr(LcpoolCache) = (intptr_t) method->constants()->cache(); + // save the mirror in the interpreter frame + *interpreter_frame->interpreter_frame_mirror_addr() = method->method_holder()->java_mirror(); + +#ifdef ASSERT + BasicObjectLock* mp = (BasicObjectLock*)monitors; + + assert(interpreter_frame->interpreter_frame_method() == method, "method matches"); + assert(interpreter_frame->interpreter_frame_local_at(9) == (intptr_t *)((intptr_t)locals - (9 * Interpreter::stackElementSize)), "locals match"); + assert(interpreter_frame->interpreter_frame_monitor_end() == mp, "monitor_end matches"); + assert(((intptr_t *)interpreter_frame->interpreter_frame_monitor_begin()) == ((intptr_t *)mp)+monitor_size, "monitor_begin matches"); + assert(interpreter_frame->interpreter_frame_tos_address()-1 == esp, "esp matches"); + + // check bounds + intptr_t* lo = interpreter_frame->sp() + (frame::memory_parameter_word_sp_offset - 1); + intptr_t* hi = interpreter_frame->fp() - rounded_vm_local_words; + assert(lo < monitors && montop <= hi, "monitors in bounds"); + assert(lo <= esp && esp < monitors, "esp in bounds"); +#endif // ASSERT +} diff -ur --new-file a/src/hotspot/cpu/sparc/args.cc b/src/hotspot/cpu/sparc/args.cc --- a/src/hotspot/cpu/sparc/args.cc 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/args.cc 2023-04-16 11:42:11.054741725 +0000 @@ -0,0 +1,309 @@ +/* + * Copyright (c) 2002, 2006, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include +#include + +static const int R_O0_num = 1000; +static const int R_I0_num = 2000; +static const int R_F0_num = 3000; +static const int R_F1_num = R_F0_num + 1; +static const int R_F2_num = R_F0_num + 2; +static const int STACK_num= 4000; + +static bool LP64 = false; +static bool LONGS_IN_ONE_ENTRY = false; + +static const int Op_RegI = 'I'; +static const int Op_RegP = 'P'; +static const int Op_RegF = 'F'; +static const int Op_RegD = 'D'; +static const int Op_RegL = 'L'; +static const int SPARC_ARGS_IN_REGS_NUM=6; + +static void print_reg( int reg ) { + if( reg == 0 ) + printf("__"); // halve's + else if( reg >= STACK_num && reg < STACK_num+100 ) + printf("S%d_",reg - STACK_num); + else if( reg >= R_F0_num && reg < R_F0_num+100 ) + printf("F%d_",reg - R_F0_num); + else if( reg >= R_O0_num && reg < R_O0_num+100 ) { + if( LONGS_IN_ONE_ENTRY ) { + reg -= R_O0_num; + printf("O%d",reg>>1); + printf(reg&1 ? "H" : "L"); + } else + printf("O%d_",reg - R_O0_num); + } else + printf("Wretched: %d\n", reg); +} + +static void print_convention( int *sig, const char *s, int length ) { + // Print it out + for( int i = 0; i < length; i++) { + if( sig[i] == 0 ) continue; // do not print 'halves' + print_reg( sig[i] & 0xFFFF ); + int reg = sig[i] >> 16; + if( reg ) { + printf(":"); + print_reg( reg ); + } else { + printf(" "); + } + printf(" "); + } + printf("\n"); +} + +static int INT_SCALE( int x ) { + return LONGS_IN_ONE_ENTRY ? (x<<1) : x; +} + +static void java_convention( int *sig, const char *s, int length ) { + if( LP64 && !LONGS_IN_ONE_ENTRY ) { + printf("LP64 and 2-reg longs not supported\n"); + return; + } + for( int i = 0; i < length; i++ ) + sig[i] = s[i]; // Reset sig array + bool is_outgoing = true; + + int int_base = (is_outgoing ? R_O0_num : R_I0_num); + + // Convention is to pack the first 6 int/oop args into the first 6 + // registers (I0-I5), extras spill to the stack. Then pack the first + // 32 float args into F0-F31, extras spill to the stack. Then pad + // all register sets to align. Then put longs and doubles into the + // same registers as they fit, else spill to the stack. + int int_reg_max = SPARC_ARGS_IN_REGS_NUM; + int flt_reg_max = 32; + + // Count int/oop and float args. See how many stack slots we'll need + // and where the longs & doubles will go. + int int_reg_cnt = 0; + int flt_reg_cnt = 0; + int stk_reg_pairs = 0; + for( int i = 0; i < length; i++) { + switch( sig[i] ) { + case Op_RegL: // Longs-in-1-reg compete with int args + if( LONGS_IN_ONE_ENTRY ) { + if( int_reg_cnt < int_reg_max ) int_reg_cnt++; + } + break; + case Op_RegP: + if( int_reg_cnt < int_reg_max ) int_reg_cnt++; + else if( !LP64 ) stk_reg_pairs++; + break; + case Op_RegI: + if( int_reg_cnt < int_reg_max ) int_reg_cnt++; + else stk_reg_pairs++; + break; + case Op_RegF: + if( flt_reg_cnt < flt_reg_max ) flt_reg_cnt++; + else stk_reg_pairs++; + break; + } + } + + // This is where the longs/doubles start on the stack. + stk_reg_pairs = (stk_reg_pairs+1) & ~1; // Round + + int int_reg_pairs = (int_reg_cnt+1) & ~1; // 32-bit 2-reg longs only + int flt_reg_pairs = (flt_reg_cnt+1) & ~1; + + int stk_reg = 0; + int int_reg = 0; + int flt_reg = 0; + + // Now do the signature layout + for( int i = 0; i < length; i++) { + int tmp = sig[i]; + if( tmp == Op_RegP ) + tmp = LP64 ? Op_RegL : Op_RegI; // Treat ptrs and ints or long accordingly + switch( tmp ) { + case Op_RegI: +// case Op_RegP: + if( int_reg < int_reg_max) tmp = INT_SCALE(int_reg++) + int_base; + else tmp = STACK_num + stk_reg++; + sig[i] = tmp; + break; + + case Op_RegL: + if( sig[i] != Op_RegP && sig[i+1] != 'h' ) { printf("expecting (h)alf, found %c\n", sig[i+1]); return; } +// case Op_RegP: + if( LONGS_IN_ONE_ENTRY ) { + if( int_reg < int_reg_max ) { + tmp = INT_SCALE(int_reg++) + int_base; + } else { + tmp = STACK_num + stk_reg_pairs; + stk_reg_pairs += 2; + } + } else { + if( int_reg_pairs < int_reg_max ) { + tmp = int_reg_pairs + int_base; + int_reg_pairs += 2; + } else { + tmp = STACK_num + stk_reg_pairs; + stk_reg_pairs += 2; + } + } + sig[i] = tmp | (tmp+1)<<16; // Smear to pair + break; + + case Op_RegF: + sig[i] = (flt_reg < flt_reg_max) ? (R_F0_num + flt_reg++) : STACK_num + stk_reg++; + break; + case Op_RegD: + if( sig[i+1] != 'h' ) { printf("expecting (h)alf, found %c\n", sig[i+1]); return; } + if( flt_reg_pairs < flt_reg_max ) { + tmp = R_F0_num + flt_reg_pairs; + flt_reg_pairs += 2; + } else { + tmp = STACK_num + stk_reg_pairs; + stk_reg_pairs += 2; + } + sig[i] = tmp | (tmp+1)<<16; // Smear to pair + break; + case 'h': sig[i] = 0; break; + default: + printf("Bad character: %c\n", sig[i] ); + return; + } + } + + printf("java "); + printf(LP64 ? "LP64 " : "LP32 "); + printf(LONGS_IN_ONE_ENTRY ? "long1: " : "long2: "); + print_convention(sig,s,length); +} + +static int int_stk_helper( int i ) { + if( i < 6 ) return R_O0_num + (LONGS_IN_ONE_ENTRY ? i<<1 : i); + else return STACK_num + (LP64 ? i<<1 : i); +} + +static void native_convention( int *sig, const char *s, int length ) { + if( LP64 && !LONGS_IN_ONE_ENTRY ) { + printf("LP64 and 2-reg longs not supported\n"); + return; + } + for( int i = 0; i < length; i++ ) + sig[i] = s[i]; // Reset sig array + + // The native convention is V8 if !LP64, which means the V8 convention is + // used both with and without LONGS_IN_ONE_ENTRY, an unfortunate split. The + // same actual machine registers are used, but they are named differently in + // the LONGS_IN_ONE_ENTRY mode. The LP64 convention is the V9 convention + // which is slightly more sane. + + if( LP64 ) { + // V9 convention: All things "as-if" on double-wide stack slots. + // Hoist any int/ptr/long's in the first 6 to int regs. + // Hoist any flt/dbl's in the first 16 dbl regs. + int j = 0; // Count of actual args, not HALVES + for( int i=0; istart(); + address addrN = csect->end(); + uint32_t prev = 0; + + assert((addrN - addr0) % BytesPerInstWord == 0, "must be"); + + for (address pc = addr0; pc != addrN; pc += BytesPerInstWord) { + uint32_t insn = *reinterpret_cast(pc); + + // 1. General case: No CTI immediately after other CTI + assert(!(is_cti(prev) && is_cti(insn)), "CTI-CTI not allowed."); + + // 2. Special case: No CTI immediately after/before RDPC + assert(!(is_cti(prev) && is_rdpc(insn)), "CTI-RDPC not allowed."); + assert(!(is_rdpc(prev) && is_cti(insn)), "RDPC-CTI not allowed."); + + prev = insn; + } +} +#endif diff -ur --new-file a/src/hotspot/cpu/sparc/assembler_sparc.hpp b/src/hotspot/cpu/sparc/assembler_sparc.hpp --- a/src/hotspot/cpu/sparc/assembler_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/assembler_sparc.hpp 2023-04-16 11:42:11.055419794 +0000 @@ -0,0 +1,1340 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_ASSEMBLER_SPARC_HPP +#define CPU_SPARC_ASSEMBLER_SPARC_HPP + +#include "asm/register.hpp" +#include "runtime/vm_version.hpp" + +// The SPARC Assembler: Pure assembler doing NO optimizations on the instruction +// level; i.e., what you write is what you get. The Assembler is generating code +// into a CodeBuffer. + +class Assembler : public AbstractAssembler { + friend class AbstractAssembler; + friend class AddressLiteral; + + // code patchers need various routines like inv_wdisp() + friend class NativeInstruction; + friend class NativeGeneralJump; + friend class Relocation; + friend class Label; + + public: + // op carries format info; see page 62 & 267 + + enum ops { + call_op = 1, // fmt 1 + branch_op = 0, // also sethi (fmt2) + arith_op = 2, // fmt 3, arith & misc + ldst_op = 3 // fmt 3, load/store + }; + + enum op2s { + bpr_op2 = 3, + fb_op2 = 6, + fbp_op2 = 5, + br_op2 = 2, + bp_op2 = 1, + sethi_op2 = 4 + }; + + enum op3s { + // selected op3s + add_op3 = 0x00, + and_op3 = 0x01, + or_op3 = 0x02, + xor_op3 = 0x03, + sub_op3 = 0x04, + andn_op3 = 0x05, + orn_op3 = 0x06, + xnor_op3 = 0x07, + addc_op3 = 0x08, + mulx_op3 = 0x09, + umul_op3 = 0x0a, + smul_op3 = 0x0b, + subc_op3 = 0x0c, + udivx_op3 = 0x0d, + udiv_op3 = 0x0e, + sdiv_op3 = 0x0f, + + addcc_op3 = 0x10, + andcc_op3 = 0x11, + orcc_op3 = 0x12, + xorcc_op3 = 0x13, + subcc_op3 = 0x14, + andncc_op3 = 0x15, + orncc_op3 = 0x16, + xnorcc_op3 = 0x17, + addccc_op3 = 0x18, + aes4_op3 = 0x19, + umulcc_op3 = 0x1a, + smulcc_op3 = 0x1b, + subccc_op3 = 0x1c, + udivcc_op3 = 0x1e, + sdivcc_op3 = 0x1f, + + taddcc_op3 = 0x20, + tsubcc_op3 = 0x21, + taddcctv_op3 = 0x22, + tsubcctv_op3 = 0x23, + mulscc_op3 = 0x24, + sll_op3 = 0x25, + sllx_op3 = 0x25, + srl_op3 = 0x26, + srlx_op3 = 0x26, + sra_op3 = 0x27, + srax_op3 = 0x27, + rdreg_op3 = 0x28, + membar_op3 = 0x28, + + flushw_op3 = 0x2b, + movcc_op3 = 0x2c, + sdivx_op3 = 0x2d, + popc_op3 = 0x2e, + movr_op3 = 0x2f, + + sir_op3 = 0x30, + wrreg_op3 = 0x30, + saved_op3 = 0x31, + + fpop1_op3 = 0x34, + fpop2_op3 = 0x35, + impdep1_op3 = 0x36, + addx_op3 = 0x36, + aes3_op3 = 0x36, + sha_op3 = 0x36, + bmask_op3 = 0x36, + bshuffle_op3 = 0x36, + alignaddr_op3 = 0x36, + faligndata_op3 = 0x36, + flog3_op3 = 0x36, + edge_op3 = 0x36, + fzero_op3 = 0x36, + fsrc_op3 = 0x36, + fnot_op3 = 0x36, + mpmul_op3 = 0x36, + umulx_op3 = 0x36, + xmulx_op3 = 0x36, + crc32c_op3 = 0x36, + impdep2_op3 = 0x37, + stpartialf_op3 = 0x37, + jmpl_op3 = 0x38, + rett_op3 = 0x39, + trap_op3 = 0x3a, + flush_op3 = 0x3b, + save_op3 = 0x3c, + restore_op3 = 0x3d, + done_op3 = 0x3e, + retry_op3 = 0x3e, + + lduw_op3 = 0x00, + ldub_op3 = 0x01, + lduh_op3 = 0x02, + ldd_op3 = 0x03, + stw_op3 = 0x04, + stb_op3 = 0x05, + sth_op3 = 0x06, + std_op3 = 0x07, + ldsw_op3 = 0x08, + ldsb_op3 = 0x09, + ldsh_op3 = 0x0a, + ldx_op3 = 0x0b, + + stx_op3 = 0x0e, + swap_op3 = 0x0f, + + stwa_op3 = 0x14, + stxa_op3 = 0x1e, + + ldf_op3 = 0x20, + ldfsr_op3 = 0x21, + ldqf_op3 = 0x22, + lddf_op3 = 0x23, + stf_op3 = 0x24, + stfsr_op3 = 0x25, + stqf_op3 = 0x26, + stdf_op3 = 0x27, + + prefetch_op3 = 0x2d, + + casa_op3 = 0x3c, + casxa_op3 = 0x3e, + + mftoi_op3 = 0x36, + + alt_bit_op3 = 0x10, + cc_bit_op3 = 0x10 + }; + + enum opfs { + // selected opfs + edge8n_opf = 0x01, + + fmovs_opf = 0x01, + fmovd_opf = 0x02, + + fnegs_opf = 0x05, + fnegd_opf = 0x06, + + addxc_opf = 0x11, + addxccc_opf = 0x13, + umulxhi_opf = 0x16, + alignaddr_opf = 0x18, + bmask_opf = 0x19, + + fadds_opf = 0x41, + faddd_opf = 0x42, + fsubs_opf = 0x45, + fsubd_opf = 0x46, + + faligndata_opf = 0x48, + + fmuls_opf = 0x49, + fmuld_opf = 0x4a, + bshuffle_opf = 0x4c, + fdivs_opf = 0x4d, + fdivd_opf = 0x4e, + + fcmps_opf = 0x51, + fcmpd_opf = 0x52, + + fstox_opf = 0x81, + fdtox_opf = 0x82, + fxtos_opf = 0x84, + fxtod_opf = 0x88, + fitos_opf = 0xc4, + fdtos_opf = 0xc6, + fitod_opf = 0xc8, + fstod_opf = 0xc9, + fstoi_opf = 0xd1, + fdtoi_opf = 0xd2, + + mdtox_opf = 0x110, + mstouw_opf = 0x111, + mstosw_opf = 0x113, + xmulx_opf = 0x115, + xmulxhi_opf = 0x116, + mxtod_opf = 0x118, + mwtos_opf = 0x119, + + aes_kexpand0_opf = 0x130, + aes_kexpand2_opf = 0x131, + + sha1_opf = 0x141, + sha256_opf = 0x142, + sha512_opf = 0x143, + + crc32c_opf = 0x147, + mpmul_opf = 0x148 + }; + + enum op5s { + aes_eround01_op5 = 0x00, + aes_eround23_op5 = 0x01, + aes_dround01_op5 = 0x02, + aes_dround23_op5 = 0x03, + aes_eround01_l_op5 = 0x04, + aes_eround23_l_op5 = 0x05, + aes_dround01_l_op5 = 0x06, + aes_dround23_l_op5 = 0x07, + aes_kexpand1_op5 = 0x08 + }; + + enum RCondition { rc_z = 1, rc_lez = 2, rc_lz = 3, rc_nz = 5, rc_gz = 6, rc_gez = 7, rc_last = rc_gez }; + + enum Condition { + // for FBfcc & FBPfcc instruction + f_never = 0, + f_notEqual = 1, + f_notZero = 1, + f_lessOrGreater = 2, + f_unorderedOrLess = 3, + f_less = 4, + f_unorderedOrGreater = 5, + f_greater = 6, + f_unordered = 7, + f_always = 8, + f_equal = 9, + f_zero = 9, + f_unorderedOrEqual = 10, + f_greaterOrEqual = 11, + f_unorderedOrGreaterOrEqual = 12, + f_lessOrEqual = 13, + f_unorderedOrLessOrEqual = 14, + f_ordered = 15, + + // for integers + + never = 0, + equal = 1, + zero = 1, + lessEqual = 2, + less = 3, + lessEqualUnsigned = 4, + lessUnsigned = 5, + carrySet = 5, + negative = 6, + overflowSet = 7, + always = 8, + notEqual = 9, + notZero = 9, + greater = 10, + greaterEqual = 11, + greaterUnsigned = 12, + greaterEqualUnsigned = 13, + carryClear = 13, + positive = 14, + overflowClear = 15 + }; + + enum CC { + // ptr_cc is the correct condition code for a pointer or intptr_t: + icc = 0, xcc = 2, ptr_cc = xcc, + fcc0 = 0, fcc1 = 1, fcc2 = 2, fcc3 = 3 + }; + + enum PrefetchFcn { + severalReads = 0, oneRead = 1, severalWritesAndPossiblyReads = 2, oneWrite = 3, page = 4 + }; + + public: + // Helper functions for groups of instructions + + enum Predict { pt = 1, pn = 0 }; // pt = predict taken + + enum Membar_mask_bits { // page 184, v9 + StoreStore = 1 << 3, + LoadStore = 1 << 2, + StoreLoad = 1 << 1, + LoadLoad = 1 << 0, + + Sync = 1 << 6, + MemIssue = 1 << 5, + Lookaside = 1 << 4 + }; + + //---< calculate length of instruction >--- + // With SPARC being a RISC architecture, this always is BytesPerInstWord + // instruction must start at passed address + static unsigned int instr_len(unsigned char *instr) { return BytesPerInstWord; } + + //---< longest instructions >--- + static unsigned int instr_maxlen() { return BytesPerInstWord; } + + static bool is_in_wdisp_range(address a, address b, int nbits) { + intptr_t d = intptr_t(b) - intptr_t(a); + return is_simm(d, nbits + 2); + } + + address target_distance(Label &L) { + // Assembler::target(L) should be called only when + // a branch instruction is emitted since non-bound + // labels record current pc() as a branch address. + if (L.is_bound()) return target(L); + // Return current address for non-bound labels. + return pc(); + } + + // test if label is in simm16 range in words (wdisp16). + bool is_in_wdisp16_range(Label &L) { + return is_in_wdisp_range(target_distance(L), pc(), 16); + } + // test if the distance between two addresses fits in simm30 range in words + static bool is_in_wdisp30_range(address a, address b) { + return is_in_wdisp_range(a, b, 30); + } + + enum ASIs { // page 72, v9 + ASI_PRIMARY = 0x80, + ASI_PRIMARY_NOFAULT = 0x82, + ASI_PRIMARY_LITTLE = 0x88, + // 8x8-bit partial store + ASI_PST8_PRIMARY = 0xC0, + // Block initializing store + ASI_ST_BLKINIT_PRIMARY = 0xE2, + // Most-Recently-Used (MRU) BIS variant + ASI_ST_BLKINIT_MRU_PRIMARY = 0xF2 + // add more from book as needed + }; + + protected: + // helpers + + // x is supposed to fit in a field "nbits" wide + // and be sign-extended. Check the range. + + static void assert_signed_range(intptr_t x, int nbits) { + assert(nbits == 32 || (-(1 << nbits-1) <= x && x < (1 << nbits-1)), + "value out of range: x=" INTPTR_FORMAT ", nbits=%d", x, nbits); + } + + static void assert_signed_word_disp_range(intptr_t x, int nbits) { + assert((x & 3) == 0, "not word aligned"); + assert_signed_range(x, nbits + 2); + } + + static void assert_unsigned_range(int x, int nbits) { + assert(juint(x) < juint(1 << nbits), "unsigned constant out of range"); + } + + // fields: note bits numbered from LSB = 0, fields known by inclusive bit range + + static int fmask(juint hi_bit, juint lo_bit) { + assert(hi_bit >= lo_bit && 0 <= lo_bit && hi_bit < 32, "bad bits"); + return (1 << (hi_bit-lo_bit + 1)) - 1; + } + + // inverse of u_field + + static int inv_u_field(int x, int hi_bit, int lo_bit) { + juint r = juint(x) >> lo_bit; + r &= fmask(hi_bit, lo_bit); + return int(r); + } + + // signed version: extract from field and sign-extend + + static int inv_s_field(int x, int hi_bit, int lo_bit) { + int sign_shift = 31 - hi_bit; + return inv_u_field(((x << sign_shift) >> sign_shift), hi_bit, lo_bit); + } + + // given a field that ranges from hi_bit to lo_bit (inclusive, + // LSB = 0), and an unsigned value for the field, + // shift it into the field + +#ifdef ASSERT + static int u_field(int x, int hi_bit, int lo_bit) { + assert((x & ~fmask(hi_bit, lo_bit)) == 0, + "value out of range"); + int r = x << lo_bit; + assert(inv_u_field(r, hi_bit, lo_bit) == x, "just checking"); + return r; + } +#else + // make sure this is inlined as it will reduce code size significantly + #define u_field(x, hi_bit, lo_bit) ((x) << (lo_bit)) +#endif + + static int inv_op(int x) { return inv_u_field(x, 31, 30); } + static int inv_op2(int x) { return inv_u_field(x, 24, 22); } + static int inv_op3(int x) { return inv_u_field(x, 24, 19); } + static int inv_cond(int x) { return inv_u_field(x, 28, 25); } + + static bool inv_immed(int x) { return (x & Assembler::immed(true)) != 0; } + + static Register inv_rd(int x) { return as_Register(inv_u_field(x, 29, 25)); } + static Register inv_rs1(int x) { return as_Register(inv_u_field(x, 18, 14)); } + static Register inv_rs2(int x) { return as_Register(inv_u_field(x, 4, 0)); } + + static int op(int x) { return u_field(x, 31, 30); } + static int rd(Register r) { return u_field(r->encoding(), 29, 25); } + static int fcn(int x) { return u_field(x, 29, 25); } + static int op3(int x) { return u_field(x, 24, 19); } + static int rs1(Register r) { return u_field(r->encoding(), 18, 14); } + static int rs2(Register r) { return u_field(r->encoding(), 4, 0); } + static int annul(bool a) { return u_field(a ? 1 : 0, 29, 29); } + static int cond(int x) { return u_field(x, 28, 25); } + static int cond_mov(int x) { return u_field(x, 17, 14); } + static int rcond(RCondition x) { return u_field(x, 12, 10); } + static int op2(int x) { return u_field(x, 24, 22); } + static int predict(bool p) { return u_field(p ? 1 : 0, 19, 19); } + static int branchcc(CC fcca) { return u_field(fcca, 21, 20); } + static int cmpcc(CC fcca) { return u_field(fcca, 26, 25); } + static int imm_asi(int x) { return u_field(x, 12, 5); } + static int immed(bool i) { return u_field(i ? 1 : 0, 13, 13); } + static int opf_low6(int w) { return u_field(w, 10, 5); } + static int opf_low5(int w) { return u_field(w, 9, 5); } + static int op5(int x) { return u_field(x, 8, 5); } + static int trapcc(CC cc) { return u_field(cc, 12, 11); } + static int sx(int i) { return u_field(i, 12, 12); } // shift x=1 means 64-bit + static int opf(int x) { return u_field(x, 13, 5); } + + static bool is_cbcond(int x) { + return (VM_Version::has_cbcond() && (inv_cond(x) > rc_last) && + inv_op(x) == branch_op && inv_op2(x) == bpr_op2); + } + static bool is_cxb(int x) { + assert(is_cbcond(x), "wrong instruction"); + return (x & (1 << 21)) != 0; + } + static bool is_branch(int x) { + if (inv_op(x) != Assembler::branch_op) return false; + + bool is_bpr = inv_op2(x) == Assembler::bpr_op2; + bool is_bp = inv_op2(x) == Assembler::bp_op2; + bool is_br = inv_op2(x) == Assembler::br_op2; + bool is_fp = inv_op2(x) == Assembler::fb_op2; + bool is_fbp = inv_op2(x) == Assembler::fbp_op2; + + return is_bpr || is_bp || is_br || is_fp || is_fbp; + } + static bool is_call(int x) { + return inv_op(x) == Assembler::call_op; + } + static bool is_jump(int x) { + if (inv_op(x) != Assembler::arith_op) return false; + + bool is_jmpl = inv_op3(x) == Assembler::jmpl_op3; + bool is_rett = inv_op3(x) == Assembler::rett_op3; + + return is_jmpl || is_rett; + } + static bool is_rdpc(int x) { + return (inv_op(x) == Assembler::arith_op && inv_op3(x) == Assembler::rdreg_op3 && + inv_u_field(x, 18, 14) == 5); + } + static bool is_cti(int x) { + return is_branch(x) || is_call(x) || is_jump(x); // Ignoring done/retry + } + + static int cond_cbcond(int x) { return u_field((((x & 8) << 1) + 8 + (x & 7)), 29, 25); } + static int inv_cond_cbcond(int x) { + assert(is_cbcond(x), "wrong instruction"); + return inv_u_field(x, 27, 25) | (inv_u_field(x, 29, 29) << 3); + } + + static int opf_cc(CC c, bool useFloat) { return u_field((useFloat ? 0 : 4) + c, 13, 11); } + static int mov_cc(CC c, bool useFloat) { return u_field(useFloat ? 0 : 1, 18, 18) | u_field(c, 12, 11); } + + static int fd(FloatRegister r, FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 29, 25); }; + static int fs1(FloatRegister r, FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 18, 14); }; + static int fs2(FloatRegister r, FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 4, 0); }; + static int fs3(FloatRegister r, FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 13, 9); }; + + // some float instructions use this encoding on the op3 field + static int alt_op3(int op, FloatRegisterImpl::Width w) { + int r; + switch(w) { + case FloatRegisterImpl::S: r = op + 0; break; + case FloatRegisterImpl::D: r = op + 3; break; + case FloatRegisterImpl::Q: r = op + 2; break; + default: ShouldNotReachHere(); break; + } + return op3(r); + } + + // compute inverse of simm + static int inv_simm(int x, int nbits) { + return (int)(x << (32 - nbits)) >> (32 - nbits); + } + + static int inv_simm13(int x) { return inv_simm(x, 13); } + + // signed immediate, in low bits, nbits long + static int simm(int x, int nbits) { + assert_signed_range(x, nbits); + return x & ((1 << nbits) - 1); + } + + // unsigned immediate, in low bits, at most nbits long. + static int uimm(int x, int nbits) { + assert_unsigned_range(x, nbits); + return x & ((1 << nbits) - 1); + } + + // compute inverse of wdisp16 + static intptr_t inv_wdisp16(int x, intptr_t pos) { + int lo = x & ((1 << 14) - 1); + int hi = (x >> 20) & 3; + if (hi >= 2) hi |= ~1; + return (((hi << 14) | lo) << 2) + pos; + } + + // word offset, 14 bits at LSend, 2 bits at B21, B20 + static int wdisp16(intptr_t x, intptr_t off) { + intptr_t xx = x - off; + assert_signed_word_disp_range(xx, 16); + int r = (xx >> 2) & ((1 << 14) - 1) | (((xx >> (2+14)) & 3) << 20); + assert(inv_wdisp16(r, off) == x, "inverse is not inverse"); + return r; + } + + // compute inverse of wdisp10 + static intptr_t inv_wdisp10(int x, intptr_t pos) { + assert(is_cbcond(x), "wrong instruction"); + int lo = inv_u_field(x, 12, 5); + int hi = (x >> 19) & 3; + if (hi >= 2) hi |= ~1; + return (((hi << 8) | lo) << 2) + pos; + } + + // word offset for cbcond, 8 bits at [B12,B5], 2 bits at [B20,B19] + static int wdisp10(intptr_t x, intptr_t off) { + assert(VM_Version::has_cbcond(), "This CPU does not have CBCOND instruction"); + intptr_t xx = x - off; + assert_signed_word_disp_range(xx, 10); + int r = (((xx >> 2) & ((1 << 8) - 1)) << 5) | (((xx >> (2+8)) & 3) << 19); + // Have to fake cbcond instruction to pass assert in inv_wdisp10() + assert(inv_wdisp10((r | op(branch_op) | cond_cbcond(rc_last+1) | op2(bpr_op2)), off) == x, "inverse is not inverse"); + return r; + } + + // word displacement in low-order nbits bits + + static intptr_t inv_wdisp(int x, intptr_t pos, int nbits) { + int pre_sign_extend = x & ((1 << nbits) - 1); + int r = (pre_sign_extend >= (1 << (nbits - 1)) ? + pre_sign_extend | ~((1 << nbits) - 1) : pre_sign_extend); + return (r << 2) + pos; + } + + static int wdisp(intptr_t x, intptr_t off, int nbits) { + intptr_t xx = x - off; + assert_signed_word_disp_range(xx, nbits); + int r = (xx >> 2) & ((1 << nbits) - 1); + assert(inv_wdisp(r, off, nbits) == x, "inverse not inverse"); + return r; + } + + + // Extract the top 32 bits in a 64 bit word + static int32_t hi32(int64_t x) { + int32_t r = int32_t((uint64_t)x >> 32); + return r; + } + + // given a sethi instruction, extract the constant, left-justified + static int inv_hi22(int x) { + return x << 10; + } + + // create an imm22 field, given a 32-bit left-justified constant + static int hi22(int x) { + int r = int(juint(x) >> 10); + assert((r & ~((1 << 22) - 1)) == 0, "just checkin'"); + return r; + } + + // create a low10 __value__ (not a field) for a given a 32-bit constant + static int low10(int x) { + return x & ((1 << 10) - 1); + } + + // create a low12 __value__ (not a field) for a given a 32-bit constant + static int low12(int x) { + return x & ((1 << 12) - 1); + } + + // AES crypto instructions supported only on certain processors + static void aes_only() { assert(VM_Version::has_aes(), "This instruction only works on SPARC with AES instructions support"); } + + // SHA crypto instructions supported only on certain processors + static void sha1_only() { assert(VM_Version::has_sha1(), "This instruction only works on SPARC with SHA1"); } + static void sha256_only() { assert(VM_Version::has_sha256(), "This instruction only works on SPARC with SHA256"); } + static void sha512_only() { assert(VM_Version::has_sha512(), "This instruction only works on SPARC with SHA512"); } + + // CRC32C instruction supported only on certain processors + static void crc32c_only() { assert(VM_Version::has_crc32c(), "This instruction only works on SPARC with CRC32C"); } + + // FMAf instructions supported only on certain processors + static void fmaf_only() { assert(VM_Version::has_fmaf(), "This instruction only works on SPARC with FMAf"); } + + // MPMUL instruction supported only on certain processors + static void mpmul_only() { assert(VM_Version::has_mpmul(), "This instruction only works on SPARC with MPMUL"); } + + // instruction only in VIS1 + static void vis1_only() { assert(VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); } + + // instruction only in VIS2 + static void vis2_only() { assert(VM_Version::has_vis2(), "This instruction only works on SPARC with VIS2"); } + + // instruction only in VIS3 + static void vis3_only() { assert(VM_Version::has_vis3(), "This instruction only works on SPARC with VIS3"); } + + // instruction deprecated in v9 + static void v9_dep() { } // do nothing for now + + protected: +#ifdef ASSERT +#define VALIDATE_PIPELINE +#endif + +#ifdef VALIDATE_PIPELINE + // A simple delay-slot scheme: + // In order to check the programmer, the assembler keeps track of delay-slots. + // It forbids CTIs in delay-slots (conservative, but should be OK). Also, when + // emitting an instruction into a delay-slot, you must do so using delayed(), + // e.g. asm->delayed()->add(...), in order to check that you do not omit the + // delay-slot instruction. To implement this, we use a simple FSA. + enum { NoDelay, AtDelay, FillDelay } _delay_state; + + // A simple hazard scheme: + // In order to avoid pipeline stalls, due to single cycle pipeline hazards, we + // adopt a simplistic state tracking mechanism that will enforce an additional + // 'nop' instruction to be inserted prior to emitting an instruction that can + // expose a given hazard (currently, PC-related hazards only). + enum { NoHazard, PcHazard } _hazard_state; +#endif + + public: + // Tell the assembler that the next instruction must NOT be in delay-slot. + // Use at start of multi-instruction macros. + void assert_not_delayed() { + // This is a separate entry to avoid the creation of string constants in + // non-asserted code, with some compilers this pollutes the object code. +#ifdef VALIDATE_PIPELINE + assert_no_delay("Next instruction should not be in a delay-slot."); +#endif + } + + protected: + void assert_no_delay(const char* msg) { +#ifdef VALIDATE_PIPELINE + assert(_delay_state == NoDelay, msg); +#endif + } + + void assert_no_hazard() { +#ifdef VALIDATE_PIPELINE + assert(_hazard_state == NoHazard, "Unsolicited pipeline hazard."); +#endif + } + + private: + inline int32_t prev_insn() { + assert(offset() > 0, "Interface violation."); + int32_t* addr = (int32_t*)pc() - 1; + return *addr; + } + +#ifdef VALIDATE_PIPELINE + void validate_no_pipeline_hazards(); +#endif + + protected: + // Avoid possible pipeline stall by inserting an additional 'nop' instruction, + // if the previous instruction is a 'cbcond' or a 'rdpc'. + inline void avoid_pipeline_stall(); + + // A call to cti() is made before emitting a control-transfer instruction (CTI) + // in order to assert a CTI is not emitted right after a 'cbcond', nor in the + // delay-slot of another CTI. Only effective when assertions are enabled. + void cti() { + // A 'cbcond' or 'rdpc' instruction immediately followed by a CTI introduces + // a pipeline stall, which we make sure to prohibit. + assert_no_cbcond_before(); + assert_no_rdpc_before(); +#ifdef VALIDATE_PIPELINE + assert_no_hazard(); + assert_no_delay("CTI in delay-slot."); +#endif + } + + // Called when emitting CTI with a delay-slot, AFTER emitting. + inline void induce_delay_slot() { +#ifdef VALIDATE_PIPELINE + assert_no_delay("Already in delay-slot."); + _delay_state = AtDelay; +#endif + } + + inline void induce_pc_hazard() { +#ifdef VALIDATE_PIPELINE + assert_no_hazard(); + _hazard_state = PcHazard; +#endif + } + + bool is_cbcond_before() { return offset() > 0 ? is_cbcond(prev_insn()) : false; } + + bool is_rdpc_before() { return offset() > 0 ? is_rdpc(prev_insn()) : false; } + + void assert_no_cbcond_before() { + assert(offset() == 0 || !is_cbcond_before(), "CBCOND should not be followed by CTI."); + } + + void assert_no_rdpc_before() { + assert(offset() == 0 || !is_rdpc_before(), "RDPC should not be followed by CTI."); + } + + public: + + bool use_cbcond(Label &L) { + if (!UseCBCond || is_cbcond_before()) return false; + intptr_t x = intptr_t(target_distance(L)) - intptr_t(pc()); + assert((x & 3) == 0, "not word aligned"); + return is_simm12(x); + } + + // Tells assembler you know that next instruction is delayed + Assembler* delayed() { +#ifdef VALIDATE_PIPELINE + assert(_delay_state == AtDelay, "Delayed instruction not in delay-slot."); + _delay_state = FillDelay; +#endif + return this; + } + + void flush() { +#ifdef VALIDATE_PIPELINE + assert(_delay_state == NoDelay, "Ending code with a delay-slot."); +#ifdef COMPILER2 + validate_no_pipeline_hazards(); +#endif +#endif + AbstractAssembler::flush(); + } + + inline void emit_int32(int32_t); // shadows AbstractAssembler::emit_int32 + inline void emit_data(int32_t); + inline void emit_data(int32_t, RelocationHolder const&); + inline void emit_data(int32_t, relocInfo::relocType rtype); + + // Helper for the above functions. + inline void check_delay(); + + + public: + // instructions, refer to page numbers in the SPARC Architecture Manual, V9 + + // pp 135 + + inline void add(Register s1, Register s2, Register d); + inline void add(Register s1, int simm13a, Register d); + + inline void addcc(Register s1, Register s2, Register d); + inline void addcc(Register s1, int simm13a, Register d); + inline void addc(Register s1, Register s2, Register d); + inline void addc(Register s1, int simm13a, Register d); + inline void addccc(Register s1, Register s2, Register d); + inline void addccc(Register s1, int simm13a, Register d); + + + // 4-operand AES instructions + + inline void aes_eround01(FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d); + inline void aes_eround23(FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d); + inline void aes_dround01(FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d); + inline void aes_dround23(FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d); + inline void aes_eround01_l(FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d); + inline void aes_eround23_l(FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d); + inline void aes_dround01_l(FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d); + inline void aes_dround23_l(FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d); + inline void aes_kexpand1(FloatRegister s1, FloatRegister s2, int imm5a, FloatRegister d); + + + // 3-operand AES instructions + + inline void aes_kexpand0(FloatRegister s1, FloatRegister s2, FloatRegister d); + inline void aes_kexpand2(FloatRegister s1, FloatRegister s2, FloatRegister d); + + // pp 136 + + inline void bpr(RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt = relocInfo::none); + inline void bpr(RCondition c, bool a, Predict p, Register s1, Label &L); + + // compare and branch + inline void cbcond(Condition c, CC cc, Register s1, Register s2, Label &L); + inline void cbcond(Condition c, CC cc, Register s1, int simm5, Label &L); + + protected: // use MacroAssembler::br instead + + // pp 138 + + inline void fb(Condition c, bool a, address d, relocInfo::relocType rt = relocInfo::none); + inline void fb(Condition c, bool a, Label &L); + + // pp 141 + + inline void fbp(Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none); + inline void fbp(Condition c, bool a, CC cc, Predict p, Label &L); + + // pp 144 + + inline void br(Condition c, bool a, address d, relocInfo::relocType rt = relocInfo::none); + inline void br(Condition c, bool a, Label &L); + + // pp 146 + + inline void bp(Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none); + inline void bp(Condition c, bool a, CC cc, Predict p, Label &L); + + // pp 149 + + inline void call(address d, relocInfo::relocType rt = relocInfo::runtime_call_type); + inline void call(Label &L, relocInfo::relocType rt = relocInfo::runtime_call_type); + + inline void call(address d, RelocationHolder const &rspec); + + public: + + // pp 150 + + // These instructions compare the contents of s2 with the contents of + // memory at address in s1. If the values are equal, the contents of memory + // at address s1 is swapped with the data in d. If the values are not equal, + // the the contents of memory at s1 is loaded into d, without the swap. + + inline void casa(Register s1, Register s2, Register d, int ia = -1); + inline void casxa(Register s1, Register s2, Register d, int ia = -1); + + // pp 152 + + inline void udiv(Register s1, Register s2, Register d); + inline void udiv(Register s1, int simm13a, Register d); + inline void sdiv(Register s1, Register s2, Register d); + inline void sdiv(Register s1, int simm13a, Register d); + inline void udivcc(Register s1, Register s2, Register d); + inline void udivcc(Register s1, int simm13a, Register d); + inline void sdivcc(Register s1, Register s2, Register d); + inline void sdivcc(Register s1, int simm13a, Register d); + + // pp 155 + + inline void done(); + inline void retry(); + + // pp 156 + + inline void fadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d); + inline void fsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d); + + // pp 157 + + inline void fcmp(FloatRegisterImpl::Width w, CC cc, FloatRegister s1, FloatRegister s2); + inline void fcmpe(FloatRegisterImpl::Width w, CC cc, FloatRegister s1, FloatRegister s2); + + // pp 159 + + inline void ftox(FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d); + inline void ftoi(FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d); + + // pp 160 + + inline void ftof(FloatRegisterImpl::Width sw, FloatRegisterImpl::Width dw, FloatRegister s, FloatRegister d); + + // pp 161 + + inline void fxtof(FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d); + inline void fitof(FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d); + + // pp 162 + + inline void fmov(FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d); + + inline void fneg(FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d); + + inline void fabs(FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d); + + // pp 163 + + inline void fmul(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d); + inline void fmul(FloatRegisterImpl::Width sw, FloatRegisterImpl::Width dw, FloatRegister s1, FloatRegister s2, FloatRegister d); + inline void fdiv(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d); + + // FXORs/FXORd instructions + + inline void fxor(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d); + + // pp 164 + + inline void fsqrt(FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d); + + // fmaf instructions. + + inline void fmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d); + inline void fmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d); + + inline void fnmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d); + inline void fnmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d); + + // pp 165 + + inline void flush(Register s1, Register s2); + inline void flush(Register s1, int simm13a); + + // pp 167 + + void flushw(); + + // pp 168 + + void illtrap(int const22a); + + // pp 169 + + void impdep1(int id1, int const19a); + void impdep2(int id1, int const19a); + + // pp 170 + + void jmpl(Register s1, Register s2, Register d); + void jmpl(Register s1, int simm13a, Register d, + RelocationHolder const &rspec = RelocationHolder()); + + // 171 + + inline void ldf(FloatRegisterImpl::Width w, Register s1, Register s2, FloatRegister d); + inline void ldf(FloatRegisterImpl::Width w, Register s1, int simm13a, FloatRegister d, + RelocationHolder const &rspec = RelocationHolder()); + + inline void ldd(Register s1, Register s2, FloatRegister d); + inline void ldd(Register s1, int simm13a, FloatRegister d); + + inline void ldfsr(Register s1, Register s2); + inline void ldfsr(Register s1, int simm13a); + inline void ldxfsr(Register s1, Register s2); + inline void ldxfsr(Register s1, int simm13a); + + // 173 + + inline void ldfa(FloatRegisterImpl::Width w, Register s1, Register s2, int ia, FloatRegister d); + inline void ldfa(FloatRegisterImpl::Width w, Register s1, int simm13a, FloatRegister d); + + // pp 175 + + inline void ldsb(Register s1, Register s2, Register d); + inline void ldsb(Register s1, int simm13a, Register d); + inline void ldsh(Register s1, Register s2, Register d); + inline void ldsh(Register s1, int simm13a, Register d); + inline void ldsw(Register s1, Register s2, Register d); + inline void ldsw(Register s1, int simm13a, Register d); + inline void ldub(Register s1, Register s2, Register d); + inline void ldub(Register s1, int simm13a, Register d); + inline void lduh(Register s1, Register s2, Register d); + inline void lduh(Register s1, int simm13a, Register d); + inline void lduw(Register s1, Register s2, Register d); + inline void lduw(Register s1, int simm13a, Register d); + inline void ldx(Register s1, Register s2, Register d); + inline void ldx(Register s1, int simm13a, Register d); + + // pp 177 + + inline void ldsba(Register s1, Register s2, int ia, Register d); + inline void ldsba(Register s1, int simm13a, Register d); + inline void ldsha(Register s1, Register s2, int ia, Register d); + inline void ldsha(Register s1, int simm13a, Register d); + inline void ldswa(Register s1, Register s2, int ia, Register d); + inline void ldswa(Register s1, int simm13a, Register d); + inline void lduba(Register s1, Register s2, int ia, Register d); + inline void lduba(Register s1, int simm13a, Register d); + inline void lduha(Register s1, Register s2, int ia, Register d); + inline void lduha(Register s1, int simm13a, Register d); + inline void lduwa(Register s1, Register s2, int ia, Register d); + inline void lduwa(Register s1, int simm13a, Register d); + inline void ldxa(Register s1, Register s2, int ia, Register d); + inline void ldxa(Register s1, int simm13a, Register d); + + // pp 181 + + inline void and3(Register s1, Register s2, Register d); + inline void and3(Register s1, int simm13a, Register d); + inline void andcc(Register s1, Register s2, Register d); + inline void andcc(Register s1, int simm13a, Register d); + inline void andn(Register s1, Register s2, Register d); + inline void andn(Register s1, int simm13a, Register d); + inline void andncc(Register s1, Register s2, Register d); + inline void andncc(Register s1, int simm13a, Register d); + inline void or3(Register s1, Register s2, Register d); + inline void or3(Register s1, int simm13a, Register d); + inline void orcc(Register s1, Register s2, Register d); + inline void orcc(Register s1, int simm13a, Register d); + inline void orn(Register s1, Register s2, Register d); + inline void orn(Register s1, int simm13a, Register d); + inline void orncc(Register s1, Register s2, Register d); + inline void orncc(Register s1, int simm13a, Register d); + inline void xor3(Register s1, Register s2, Register d); + inline void xor3(Register s1, int simm13a, Register d); + inline void xorcc(Register s1, Register s2, Register d); + inline void xorcc(Register s1, int simm13a, Register d); + inline void xnor(Register s1, Register s2, Register d); + inline void xnor(Register s1, int simm13a, Register d); + inline void xnorcc(Register s1, Register s2, Register d); + inline void xnorcc(Register s1, int simm13a, Register d); + + // pp 183 + + inline void membar(Membar_mask_bits const7a); + + // pp 185 + + inline void fmov(FloatRegisterImpl::Width w, Condition c, bool floatCC, CC cca, FloatRegister s2, FloatRegister d); + + // pp 189 + + inline void fmov(FloatRegisterImpl::Width w, RCondition c, Register s1, FloatRegister s2, FloatRegister d); + + // pp 191 + + inline void movcc(Condition c, bool floatCC, CC cca, Register s2, Register d); + inline void movcc(Condition c, bool floatCC, CC cca, int simm11a, Register d); + + // pp 195 + + inline void movr(RCondition c, Register s1, Register s2, Register d); + inline void movr(RCondition c, Register s1, int simm10a, Register d); + + // pp 196 + + inline void mulx(Register s1, Register s2, Register d); + inline void mulx(Register s1, int simm13a, Register d); + inline void sdivx(Register s1, Register s2, Register d); + inline void sdivx(Register s1, int simm13a, Register d); + inline void udivx(Register s1, Register s2, Register d); + inline void udivx(Register s1, int simm13a, Register d); + + // pp 197 + + inline void umul(Register s1, Register s2, Register d); + inline void umul(Register s1, int simm13a, Register d); + inline void smul(Register s1, Register s2, Register d); + inline void smul(Register s1, int simm13a, Register d); + inline void umulcc(Register s1, Register s2, Register d); + inline void umulcc(Register s1, int simm13a, Register d); + inline void smulcc(Register s1, Register s2, Register d); + inline void smulcc(Register s1, int simm13a, Register d); + + // pp 201 + + inline void nop(); + + inline void sw_count(); + + // pp 202 + + inline void popc(Register s, Register d); + inline void popc(int simm13a, Register d); + + // pp 203 + + inline void prefetch(Register s1, Register s2, PrefetchFcn f); + inline void prefetch(Register s1, int simm13a, PrefetchFcn f); + + inline void prefetcha(Register s1, Register s2, int ia, PrefetchFcn f); + inline void prefetcha(Register s1, int simm13a, PrefetchFcn f); + + // pp 208 + + // not implementing read privileged register + + inline void rdy(Register d); + inline void rdccr(Register d); + inline void rdasi(Register d); + inline void rdtick(Register d); + inline void rdpc(Register d); + inline void rdfprs(Register d); + + // pp 213 + + inline void rett(Register s1, Register s2); + inline void rett(Register s1, int simm13a, relocInfo::relocType rt = relocInfo::none); + + // pp 214 + + inline void save(Register s1, Register s2, Register d); + inline void save(Register s1, int simm13a, Register d); + + inline void restore(Register s1 = G0, Register s2 = G0, Register d = G0); + inline void restore(Register s1, int simm13a, Register d); + + // pp 216 + + inline void saved(); + inline void restored(); + + // pp 217 + + inline void sethi(int imm22a, Register d, RelocationHolder const &rspec = RelocationHolder()); + + // pp 218 + + inline void sll(Register s1, Register s2, Register d); + inline void sll(Register s1, int imm5a, Register d); + inline void srl(Register s1, Register s2, Register d); + inline void srl(Register s1, int imm5a, Register d); + inline void sra(Register s1, Register s2, Register d); + inline void sra(Register s1, int imm5a, Register d); + + inline void sllx(Register s1, Register s2, Register d); + inline void sllx(Register s1, int imm6a, Register d); + inline void srlx(Register s1, Register s2, Register d); + inline void srlx(Register s1, int imm6a, Register d); + inline void srax(Register s1, Register s2, Register d); + inline void srax(Register s1, int imm6a, Register d); + + // pp 220 + + inline void sir(int simm13a); + + // pp 221 + + inline void stbar(); + + // pp 222 + + inline void stf(FloatRegisterImpl::Width w, FloatRegister d, Register s1, Register s2); + inline void stf(FloatRegisterImpl::Width w, FloatRegister d, Register s1, int simm13a); + + inline void std(FloatRegister d, Register s1, Register s2); + inline void std(FloatRegister d, Register s1, int simm13a); + + inline void stfsr(Register s1, Register s2); + inline void stfsr(Register s1, int simm13a); + inline void stxfsr(Register s1, Register s2); + inline void stxfsr(Register s1, int simm13a); + + // pp 224 + + inline void stfa(FloatRegisterImpl::Width w, FloatRegister d, Register s1, Register s2, int ia); + inline void stfa(FloatRegisterImpl::Width w, FloatRegister d, Register s1, int simm13a); + + // pp 226 + + inline void stb(Register d, Register s1, Register s2); + inline void stb(Register d, Register s1, int simm13a); + inline void sth(Register d, Register s1, Register s2); + inline void sth(Register d, Register s1, int simm13a); + inline void stw(Register d, Register s1, Register s2); + inline void stw(Register d, Register s1, int simm13a); + inline void stx(Register d, Register s1, Register s2); + inline void stx(Register d, Register s1, int simm13a); + + // pp 177 + + inline void stba(Register d, Register s1, Register s2, int ia); + inline void stba(Register d, Register s1, int simm13a); + inline void stha(Register d, Register s1, Register s2, int ia); + inline void stha(Register d, Register s1, int simm13a); + inline void stwa(Register d, Register s1, Register s2, int ia); + inline void stwa(Register d, Register s1, int simm13a); + inline void stxa(Register d, Register s1, Register s2, int ia); + inline void stxa(Register d, Register s1, int simm13a); + inline void stda(Register d, Register s1, Register s2, int ia); + inline void stda(Register d, Register s1, int simm13a); + + // pp 230 + + inline void sub(Register s1, Register s2, Register d); + inline void sub(Register s1, int simm13a, Register d); + + inline void subcc(Register s1, Register s2, Register d); + inline void subcc(Register s1, int simm13a, Register d); + inline void subc(Register s1, Register s2, Register d); + inline void subc(Register s1, int simm13a, Register d); + inline void subccc(Register s1, Register s2, Register d); + inline void subccc(Register s1, int simm13a, Register d); + + // pp 231 + + inline void swap(Register s1, Register s2, Register d); + inline void swap(Register s1, int simm13a, Register d); + + // pp 232 + + inline void swapa(Register s1, Register s2, int ia, Register d); + inline void swapa(Register s1, int simm13a, Register d); + + // pp 234, note op in book is wrong, see pp 268 + + inline void taddcc(Register s1, Register s2, Register d); + inline void taddcc(Register s1, int simm13a, Register d); + + // pp 235 + + inline void tsubcc(Register s1, Register s2, Register d); + inline void tsubcc(Register s1, int simm13a, Register d); + + // pp 237 + + inline void trap(Condition c, CC cc, Register s1, Register s2); + inline void trap(Condition c, CC cc, Register s1, int trapa); + // simple uncond. trap + inline void trap(int trapa); + + // pp 239 omit write priv register for now + + inline void wry(Register d); + inline void wrccr(Register s); + inline void wrccr(Register s, int simm13a); + inline void wrasi(Register d); + // wrasi(d, imm) stores (d xor imm) to asi + inline void wrasi(Register d, int simm13a); + inline void wrfprs(Register d); + + // VIS1 instructions + + inline void alignaddr(Register s1, Register s2, Register d); + + inline void faligndata(FloatRegister s1, FloatRegister s2, FloatRegister d); + + inline void fzero(FloatRegisterImpl::Width w, FloatRegister d); + + inline void fsrc2(FloatRegisterImpl::Width w, FloatRegister s2, FloatRegister d); + + inline void fnot1(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister d); + + inline void fpmerge(FloatRegister s1, FloatRegister s2, FloatRegister d); + + inline void stpartialf(Register s1, Register s2, FloatRegister d, int ia = -1); + + // VIS2 instructions + + inline void edge8n(Register s1, Register s2, Register d); + + inline void bmask(Register s1, Register s2, Register d); + inline void bshuffle(FloatRegister s1, FloatRegister s2, FloatRegister d); + + // VIS3 instructions + + inline void addxc(Register s1, Register s2, Register d); + inline void addxccc(Register s1, Register s2, Register d); + + inline void movstosw(FloatRegister s, Register d); + inline void movstouw(FloatRegister s, Register d); + inline void movdtox(FloatRegister s, Register d); + + inline void movwtos(Register s, FloatRegister d); + inline void movxtod(Register s, FloatRegister d); + + inline void xmulx(Register s1, Register s2, Register d); + inline void xmulxhi(Register s1, Register s2, Register d); + inline void umulxhi(Register s1, Register s2, Register d); + + // Crypto SHA instructions + + inline void sha1(); + inline void sha256(); + inline void sha512(); + + // CRC32C instruction + + inline void crc32c(FloatRegister s1, FloatRegister s2, FloatRegister d); + + // MPMUL instruction + + inline void mpmul(int uimm5); + + // Creation + Assembler(CodeBuffer* code) : AbstractAssembler(code) { +#ifdef VALIDATE_PIPELINE + _delay_state = NoDelay; + _hazard_state = NoHazard; +#endif + } +}; + +#endif // CPU_SPARC_ASSEMBLER_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/assembler_sparc.inline.hpp b/src/hotspot/cpu/sparc/assembler_sparc.inline.hpp --- a/src/hotspot/cpu/sparc/assembler_sparc.inline.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/assembler_sparc.inline.hpp 2023-04-16 11:42:11.055935244 +0000 @@ -0,0 +1,1130 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_ASSEMBLER_SPARC_INLINE_HPP +#define CPU_SPARC_ASSEMBLER_SPARC_INLINE_HPP + +#include "asm/assembler.hpp" + + +inline void Assembler::avoid_pipeline_stall() { +#ifdef VALIDATE_PIPELINE + if (_hazard_state == PcHazard) { + assert(is_cbcond_before() || is_rdpc_before(), "PC-hazard not preceded by CBCOND or RDPC."); + assert_no_delay("Must not have PC-hazard state in delay-slot."); + nop(); + _hazard_state = NoHazard; + } +#endif + + bool post_cond = is_cbcond_before(); + bool post_rdpc = is_rdpc_before(); + + if (post_cond || post_rdpc) { + nop(); +#ifdef VALIDATE_PIPELINE + if (_hazard_state != PcHazard) { + assert(post_cond, "CBCOND before when no hazard @0x%p\n", pc()); + assert(post_rdpc, "RDPC before when no hazard @0x%p\n", pc()); + } +#endif + } +} + +inline void Assembler::check_delay() { +#ifdef VALIDATE_PIPELINE + guarantee(_delay_state != AtDelay, "Use delayed() when filling delay-slot"); + _delay_state = NoDelay; +#endif +} + +inline void Assembler::emit_int32(int32_t x) { + check_delay(); +#ifdef VALIDATE_PIPELINE + _hazard_state = NoHazard; +#endif + AbstractAssembler::emit_int32(x); +} + +inline void Assembler::emit_data(int32_t x) { + emit_int32(x); +} + +inline void Assembler::emit_data(int32_t x, relocInfo::relocType rtype) { + relocate(rtype); + emit_int32(x); +} + +inline void Assembler::emit_data(int32_t x, RelocationHolder const &rspec) { + relocate(rspec); + emit_int32(x); +} + + +inline void Assembler::add(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(add_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::add(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(add_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +inline void Assembler::addcc(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(add_op3 | cc_bit_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::addcc(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(add_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::addc(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(addc_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::addc(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(addc_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::addccc(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(addc_op3 | cc_bit_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::addccc(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(addc_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +inline void Assembler::aes_eround01(FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) { + aes_only(); + emit_int32(op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround01_op5) | fs2(s2, FloatRegisterImpl::D)); +} +inline void Assembler::aes_eround23(FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) { + aes_only(); + emit_int32(op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround23_op5) | fs2(s2, FloatRegisterImpl::D)); +} +inline void Assembler::aes_dround01(FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) { + aes_only(); + emit_int32(op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround01_op5) | fs2(s2, FloatRegisterImpl::D)); +} +inline void Assembler::aes_dround23(FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) { + aes_only(); + emit_int32(op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround23_op5) | fs2(s2, FloatRegisterImpl::D)); +} +inline void Assembler::aes_eround01_l(FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) { + aes_only(); + emit_int32(op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround01_l_op5) | fs2(s2, FloatRegisterImpl::D)); +} +inline void Assembler::aes_eround23_l(FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) { + aes_only(); + emit_int32(op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround23_l_op5) | fs2(s2, FloatRegisterImpl::D)); +} +inline void Assembler::aes_dround01_l(FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) { + aes_only(); + emit_int32(op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround01_l_op5) | fs2(s2, FloatRegisterImpl::D)); +} +inline void Assembler::aes_dround23_l(FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) { + aes_only(); + emit_int32(op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround23_l_op5) | fs2(s2, FloatRegisterImpl::D)); +} +inline void Assembler::aes_kexpand1(FloatRegister s1, FloatRegister s2, int imm5a, FloatRegister d) { + aes_only(); + emit_int32(op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | u_field(imm5a, 13, 9) | op5(aes_kexpand1_op5) | fs2(s2, FloatRegisterImpl::D)); +} + +// 3-operand AES instructions + +inline void Assembler::aes_kexpand0(FloatRegister s1, FloatRegister s2, FloatRegister d) { + aes_only(); + emit_int32(op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes3_op3) | fs1(s1, FloatRegisterImpl::D) | opf(aes_kexpand0_opf) | fs2(s2, FloatRegisterImpl::D)); +} +inline void Assembler::aes_kexpand2(FloatRegister s1, FloatRegister s2, FloatRegister d) { + aes_only(); + emit_int32(op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes3_op3) | fs1(s1, FloatRegisterImpl::D) | opf(aes_kexpand2_opf) | fs2(s2, FloatRegisterImpl::D)); +} + +inline void Assembler::bpr(RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt) { + avoid_pipeline_stall(); + cti(); + emit_data(op(branch_op) | annul(a) | cond(c) | op2(bpr_op2) | wdisp16(intptr_t(d), intptr_t(pc())) | predict(p) | rs1(s1), rt); + induce_delay_slot(); +} +inline void Assembler::bpr(RCondition c, bool a, Predict p, Register s1, Label &L) { + // Note[+]: All assembly emit routines using the 'target()' branch back-patch + // resolver must call 'avoid_pipeline_stall()' prior to calling 'target()' + // (we must do so even though the call will be made, as here, in the above + // implementation of 'bpr()', invoked below). The reason is the assumption + // made in 'target()', where using the current PC as the address for back- + // patching prevents any additional code to be emitted _after_ the address + // has been set (implicitly) in order to refer to the correct instruction. + avoid_pipeline_stall(); + bpr(c, a, p, s1, target(L)); +} + +inline void Assembler::fb(Condition c, bool a, address d, relocInfo::relocType rt) { + v9_dep(); + avoid_pipeline_stall(); + cti(); + emit_data(op(branch_op) | annul(a) | cond(c) | op2(fb_op2) | wdisp(intptr_t(d), intptr_t(pc()), 22), rt); + induce_delay_slot(); +} +inline void Assembler::fb(Condition c, bool a, Label &L) { + avoid_pipeline_stall(); + fb(c, a, target(L)); +} + +inline void Assembler::fbp(Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt) { + avoid_pipeline_stall(); + cti(); + emit_data(op(branch_op) | annul(a) | cond(c) | op2(fbp_op2) | branchcc(cc) | predict(p) | wdisp(intptr_t(d), intptr_t(pc()), 19), rt); + induce_delay_slot(); +} +inline void Assembler::fbp(Condition c, bool a, CC cc, Predict p, Label &L) { + avoid_pipeline_stall(); + fbp(c, a, cc, p, target(L)); +} + +inline void Assembler::br(Condition c, bool a, address d, relocInfo::relocType rt) { + v9_dep(); + avoid_pipeline_stall(); + cti(); + emit_data(op(branch_op) | annul(a) | cond(c) | op2(br_op2) | wdisp(intptr_t(d), intptr_t(pc()), 22), rt); + induce_delay_slot(); +} +inline void Assembler::br(Condition c, bool a, Label &L) { + avoid_pipeline_stall(); + br(c, a, target(L)); +} + +inline void Assembler::bp(Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt) { + avoid_pipeline_stall(); + cti(); + emit_data(op(branch_op) | annul(a) | cond(c) | op2(bp_op2) | branchcc(cc) | predict(p) | wdisp(intptr_t(d), intptr_t(pc()), 19), rt); + induce_delay_slot(); +} +inline void Assembler::bp(Condition c, bool a, CC cc, Predict p, Label &L) { + avoid_pipeline_stall(); + bp(c, a, cc, p, target(L)); +} + +// compare and branch +inline void Assembler::cbcond(Condition c, CC cc, Register s1, Register s2, Label &L) { + avoid_pipeline_stall(); + cti(); + emit_data(op(branch_op) | cond_cbcond(c) | op2(bpr_op2) | branchcc(cc) | wdisp10(intptr_t(target(L)), intptr_t(pc())) | rs1(s1) | rs2(s2)); + induce_pc_hazard(); +} +inline void Assembler::cbcond(Condition c, CC cc, Register s1, int simm5, Label &L) { + avoid_pipeline_stall(); + cti(); + emit_data(op(branch_op) | cond_cbcond(c) | op2(bpr_op2) | branchcc(cc) | wdisp10(intptr_t(target(L)), intptr_t(pc())) | rs1(s1) | immed(true) | simm(simm5, 5)); + induce_pc_hazard(); +} + +inline void Assembler::call(address d, relocInfo::relocType rt) { + avoid_pipeline_stall(); + cti(); + emit_data(op(call_op) | wdisp(intptr_t(d), intptr_t(pc()), 30), rt); + induce_delay_slot(); + assert(rt != relocInfo::virtual_call_type, "must use virtual_call_Relocation::spec"); +} +inline void Assembler::call(Label &L, relocInfo::relocType rt) { + avoid_pipeline_stall(); + call(target(L), rt); +} + +inline void Assembler::call(address d, RelocationHolder const &rspec) { + avoid_pipeline_stall(); + cti(); + emit_data(op(call_op) | wdisp(intptr_t(d), intptr_t(pc()), 30), rspec); + induce_delay_slot(); + assert(rspec.type() != relocInfo::virtual_call_type, "must use virtual_call_Relocation::spec"); +} + +inline void Assembler::casa(Register s1, Register s2, Register d, int ia) { + emit_int32(op(ldst_op) | rd(d) | op3(casa_op3) | rs1(s1) | (ia == -1 ? immed(true) : imm_asi(ia)) | rs2(s2)); +} +inline void Assembler::casxa(Register s1, Register s2, Register d, int ia) { + emit_int32(op(ldst_op) | rd(d) | op3(casxa_op3) | rs1(s1) | (ia == -1 ? immed(true) : imm_asi(ia)) | rs2(s2)); +} + +inline void Assembler::udiv(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(udiv_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::udiv(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(udiv_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::sdiv(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(sdiv_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::sdiv(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(sdiv_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::udivcc(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(udiv_op3 | cc_bit_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::udivcc(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(udiv_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::sdivcc(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(sdiv_op3 | cc_bit_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::sdivcc(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(sdiv_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +inline void Assembler::done() { + cti(); + emit_int32(op(arith_op) | fcn(0) | op3(done_op3)); +} +inline void Assembler::retry() { + cti(); + emit_int32(op(arith_op) | fcn(1) | op3(retry_op3)); +} + +inline void Assembler::fadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d) { + emit_int32(op(arith_op) | fd(d, w) | op3(fpop1_op3) | fs1(s1, w) | opf(0x40 + w) | fs2(s2, w)); +} +inline void Assembler::fsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d) { + emit_int32(op(arith_op) | fd(d, w) | op3(fpop1_op3) | fs1(s1, w) | opf(0x44 + w) | fs2(s2, w)); +} + +inline void Assembler::fcmp(FloatRegisterImpl::Width w, CC cc, FloatRegister s1, FloatRegister s2) { + emit_int32(op(arith_op) | cmpcc(cc) | op3(fpop2_op3) | fs1(s1, w) | opf(0x50 + w) | fs2(s2, w)); +} +inline void Assembler::fcmpe(FloatRegisterImpl::Width w, CC cc, FloatRegister s1, FloatRegister s2) { + emit_int32(op(arith_op) | cmpcc(cc) | op3(fpop2_op3) | fs1(s1, w) | opf(0x54 + w) | fs2(s2, w)); +} + +inline void Assembler::ftox(FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d) { + emit_int32(op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(fpop1_op3) | opf(0x80 + w) | fs2(s, w)); +} +inline void Assembler::ftoi(FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d) { + emit_int32(op(arith_op) | fd(d, FloatRegisterImpl::S) | op3(fpop1_op3) | opf(0xd0 + w) | fs2(s, w)); +} + +inline void Assembler::ftof(FloatRegisterImpl::Width sw, FloatRegisterImpl::Width dw, FloatRegister s, FloatRegister d) { + emit_int32(op(arith_op) | fd(d, dw) | op3(fpop1_op3) | opf(0xc0 + sw + dw*4) | fs2(s, sw)); +} + +inline void Assembler::fxtof(FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d) { + emit_int32(op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x80 + w*4) | fs2(s, FloatRegisterImpl::D)); +} +inline void Assembler::fitof(FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d) { + emit_int32(op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0xc0 + w*4) | fs2(s, FloatRegisterImpl::S)); +} + +inline void Assembler::fmov(FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d) { + emit_int32(op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x00 + w) | fs2(s, w)); +} +inline void Assembler::fneg(FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d) { + emit_int32(op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x04 + w) | fs2(s, w)); +} +inline void Assembler::fabs(FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d) { + emit_int32(op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x08 + w) | fs2(s, w)); +} +inline void Assembler::fmul(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d) { + emit_int32(op(arith_op) | fd(d, w) | op3(fpop1_op3) | fs1(s1, w) | opf(0x48 + w) | fs2(s2, w)); +} +inline void Assembler::fmul(FloatRegisterImpl::Width sw, FloatRegisterImpl::Width dw, FloatRegister s1, FloatRegister s2, FloatRegister d) { + emit_int32(op(arith_op) | fd(d, dw) | op3(fpop1_op3) | fs1(s1, sw) | opf(0x60 + sw + dw*4) | fs2(s2, sw)); +} +inline void Assembler::fdiv(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d) { + emit_int32(op(arith_op) | fd(d, w) | op3(fpop1_op3) | fs1(s1, w) | opf(0x4c + w) | fs2(s2, w)); +} + +inline void Assembler::fxor(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d) { + vis1_only(); + emit_int32(op(arith_op) | fd(d, w) | op3(flog3_op3) | fs1(s1, w) | opf(0x6E - w) | fs2(s2, w)); +} + +inline void Assembler::fsqrt(FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d) { + emit_int32(op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x28 + w) | fs2(s, w)); +} + +inline void Assembler::fmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) { + fmaf_only(); + emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(w) | fs2(s2, w)); +} +inline void Assembler::fmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) { + fmaf_only(); + emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(0x4 + w) | fs2(s2, w)); +} + +inline void Assembler::fnmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) { + fmaf_only(); + emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(0xc + w) | fs2(s2, w)); +} +inline void Assembler::fnmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) { + fmaf_only(); + emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(0x8 + w) | fs2(s2, w)); +} + +inline void Assembler::flush(Register s1, Register s2) { + emit_int32(op(arith_op) | op3(flush_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::flush(Register s1, int simm13a) { + emit_data(op(arith_op) | op3(flush_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +inline void Assembler::flushw() { + emit_int32(op(arith_op) | op3(flushw_op3)); +} + +inline void Assembler::illtrap(int const22a) { + emit_int32(op(branch_op) | u_field(const22a, 21, 0)); +} + +inline void Assembler::impdep1(int id1, int const19a) { + emit_int32(op(arith_op) | fcn(id1) | op3(impdep1_op3) | u_field(const19a, 18, 0)); +} +inline void Assembler::impdep2(int id1, int const19a) { + emit_int32(op(arith_op) | fcn(id1) | op3(impdep2_op3) | u_field(const19a, 18, 0)); +} + +inline void Assembler::jmpl(Register s1, Register s2, Register d) { + avoid_pipeline_stall(); + cti(); + emit_int32(op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | rs2(s2)); + induce_delay_slot(); +} +inline void Assembler::jmpl(Register s1, int simm13a, Register d, RelocationHolder const &rspec) { + avoid_pipeline_stall(); + cti(); + emit_data(op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec); + induce_delay_slot(); +} + +inline void Assembler::ldf(FloatRegisterImpl::Width w, Register s1, Register s2, FloatRegister d) { + emit_int32(op(ldst_op) | fd(d, w) | alt_op3(ldf_op3, w) | rs1(s1) | rs2(s2)); +} +inline void Assembler::ldf(FloatRegisterImpl::Width w, Register s1, int simm13a, FloatRegister d, RelocationHolder const &rspec) { + emit_data(op(ldst_op) | fd(d, w) | alt_op3(ldf_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec); +} + +inline void Assembler::ldd(Register s1, Register s2, FloatRegister d) { + assert(d->is_even(), "not even"); + ldf(FloatRegisterImpl::D, s1, s2, d); +} +inline void Assembler::ldd(Register s1, int simm13a, FloatRegister d) { + assert(d->is_even(), "not even"); + ldf(FloatRegisterImpl::D, s1, simm13a, d); +} + +inline void Assembler::ldxfsr(Register s1, Register s2) { + emit_int32(op(ldst_op) | rd(G1) | op3(ldfsr_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::ldxfsr(Register s1, int simm13a) { + emit_data(op(ldst_op) | rd(G1) | op3(ldfsr_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +inline void Assembler::ldfa(FloatRegisterImpl::Width w, Register s1, Register s2, int ia, FloatRegister d) { + emit_int32(op(ldst_op) | fd(d, w) | alt_op3(ldf_op3 | alt_bit_op3, w) | rs1(s1) | imm_asi(ia) | rs2(s2)); +} +inline void Assembler::ldfa(FloatRegisterImpl::Width w, Register s1, int simm13a, FloatRegister d) { + emit_int32(op(ldst_op) | fd(d, w) | alt_op3(ldf_op3 | alt_bit_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +inline void Assembler::ldsb(Register s1, Register s2, Register d) { + emit_int32(op(ldst_op) | rd(d) | op3(ldsb_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::ldsb(Register s1, int simm13a, Register d) { + emit_data(op(ldst_op) | rd(d) | op3(ldsb_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +inline void Assembler::ldsh(Register s1, Register s2, Register d) { + emit_int32(op(ldst_op) | rd(d) | op3(ldsh_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::ldsh(Register s1, int simm13a, Register d) { + emit_data(op(ldst_op) | rd(d) | op3(ldsh_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::ldsw(Register s1, Register s2, Register d) { + emit_int32(op(ldst_op) | rd(d) | op3(ldsw_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::ldsw(Register s1, int simm13a, Register d) { + emit_data(op(ldst_op) | rd(d) | op3(ldsw_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::ldub(Register s1, Register s2, Register d) { + emit_int32(op(ldst_op) | rd(d) | op3(ldub_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::ldub(Register s1, int simm13a, Register d) { + emit_data(op(ldst_op) | rd(d) | op3(ldub_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::lduh(Register s1, Register s2, Register d) { + emit_int32(op(ldst_op) | rd(d) | op3(lduh_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::lduh(Register s1, int simm13a, Register d) { + emit_data(op(ldst_op) | rd(d) | op3(lduh_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::lduw(Register s1, Register s2, Register d) { + emit_int32(op(ldst_op) | rd(d) | op3(lduw_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::lduw(Register s1, int simm13a, Register d) { + emit_data(op(ldst_op) | rd(d) | op3(lduw_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +inline void Assembler::ldx(Register s1, Register s2, Register d) { + emit_int32(op(ldst_op) | rd(d) | op3(ldx_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::ldx(Register s1, int simm13a, Register d) { + emit_data(op(ldst_op) | rd(d) | op3(ldx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +inline void Assembler::ldsba(Register s1, Register s2, int ia, Register d) { + emit_int32(op(ldst_op) | rd(d) | op3(ldsb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2)); +} +inline void Assembler::ldsba(Register s1, int simm13a, Register d) { + emit_int32(op(ldst_op) | rd(d) | op3(ldsb_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::ldsha(Register s1, Register s2, int ia, Register d) { + emit_int32(op(ldst_op) | rd(d) | op3(ldsh_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2)); +} +inline void Assembler::ldsha(Register s1, int simm13a, Register d) { + emit_int32(op(ldst_op) | rd(d) | op3(ldsh_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::ldswa(Register s1, Register s2, int ia, Register d) { + emit_int32(op(ldst_op) | rd(d) | op3(ldsw_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2)); +} +inline void Assembler::ldswa(Register s1, int simm13a, Register d) { + emit_int32(op(ldst_op) | rd(d) | op3(ldsw_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::lduba(Register s1, Register s2, int ia, Register d) { + emit_int32(op(ldst_op) | rd(d) | op3(ldub_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2)); +} +inline void Assembler::lduba(Register s1, int simm13a, Register d) { + emit_int32(op(ldst_op) | rd(d) | op3(ldub_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::lduha(Register s1, Register s2, int ia, Register d) { + emit_int32(op(ldst_op) | rd(d) | op3(lduh_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2)); +} +inline void Assembler::lduha(Register s1, int simm13a, Register d) { + emit_int32(op(ldst_op) | rd(d) | op3(lduh_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::lduwa(Register s1, Register s2, int ia, Register d) { + emit_int32(op(ldst_op) | rd(d) | op3(lduw_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2)); +} +inline void Assembler::lduwa(Register s1, int simm13a, Register d) { + emit_int32(op(ldst_op) | rd(d) | op3(lduw_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::ldxa(Register s1, Register s2, int ia, Register d) { + emit_int32(op(ldst_op) | rd(d) | op3(ldx_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2)); +} +inline void Assembler::ldxa(Register s1, int simm13a, Register d) { + emit_int32(op(ldst_op) | rd(d) | op3(ldx_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +inline void Assembler::and3(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(and_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::and3(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(and_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::andcc(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(and_op3 | cc_bit_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::andcc(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(and_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::andn(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(andn_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::andn(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(andn_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::andncc(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(andn_op3 | cc_bit_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::andncc(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(andn_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::or3(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(or_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::or3(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(or_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::orcc(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(or_op3 | cc_bit_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::orcc(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(or_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::orn(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(orn_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::orn(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(orn_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::orncc(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(orn_op3 | cc_bit_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::orncc(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(orn_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::xor3(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(xor_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::xor3(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(xor_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::xorcc(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(xor_op3 | cc_bit_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::xorcc(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(xor_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::xnor(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(xnor_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::xnor(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(xnor_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::xnorcc(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(xnor_op3 | cc_bit_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::xnorcc(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(xnor_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +inline void Assembler::membar(Membar_mask_bits const7a) { + emit_int32(op(arith_op) | op3(membar_op3) | rs1(O7) | immed(true) | u_field(int(const7a), 6, 0)); +} + +inline void Assembler::fmov(FloatRegisterImpl::Width w, Condition c, bool floatCC, CC cca, FloatRegister s2, FloatRegister d) { + emit_int32(op(arith_op) | fd(d, w) | op3(fpop2_op3) | cond_mov(c) | opf_cc(cca, floatCC) | opf_low6(w) | fs2(s2, w)); +} + +inline void Assembler::fmov(FloatRegisterImpl::Width w, RCondition c, Register s1, FloatRegister s2, FloatRegister d) { + emit_int32(op(arith_op) | fd(d, w) | op3(fpop2_op3) | rs1(s1) | rcond(c) | opf_low5(4 + w) | fs2(s2, w)); +} + +inline void Assembler::movcc(Condition c, bool floatCC, CC cca, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(movcc_op3) | mov_cc(cca, floatCC) | cond_mov(c) | rs2(s2)); +} +inline void Assembler::movcc(Condition c, bool floatCC, CC cca, int simm11a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(movcc_op3) | mov_cc(cca, floatCC) | cond_mov(c) | immed(true) | simm(simm11a, 11)); +} + +inline void Assembler::movr(RCondition c, Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(movr_op3) | rs1(s1) | rcond(c) | rs2(s2)); +} +inline void Assembler::movr(RCondition c, Register s1, int simm10a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(movr_op3) | rs1(s1) | rcond(c) | immed(true) | simm(simm10a, 10)); +} + +inline void Assembler::mulx(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(mulx_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::mulx(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(mulx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::sdivx(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(sdivx_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::sdivx(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(sdivx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::udivx(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(udivx_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::udivx(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(udivx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +inline void Assembler::umul(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(umul_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::umul(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(umul_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::smul(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(smul_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::smul(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(smul_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::umulcc(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(umul_op3 | cc_bit_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::umulcc(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(umul_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::smulcc(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(smul_op3 | cc_bit_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::smulcc(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(smul_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +inline void Assembler::nop() { + emit_int32(op(branch_op) | op2(sethi_op2)); +} + +inline void Assembler::sw_count() { + emit_int32(op(branch_op) | op2(sethi_op2) | 0x3f0); +} + +inline void Assembler::popc(Register s, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(popc_op3) | rs2(s)); +} +inline void Assembler::popc(int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(popc_op3) | immed(true) | simm(simm13a, 13)); +} + +inline void Assembler::prefetch(Register s1, Register s2, PrefetchFcn f) { + emit_int32(op(ldst_op) | fcn(f) | op3(prefetch_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::prefetch(Register s1, int simm13a, PrefetchFcn f) { + emit_data(op(ldst_op) | fcn(f) | op3(prefetch_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +inline void Assembler::prefetcha(Register s1, Register s2, int ia, PrefetchFcn f) { + emit_int32(op(ldst_op) | fcn(f) | op3(prefetch_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2)); +} +inline void Assembler::prefetcha(Register s1, int simm13a, PrefetchFcn f) { + emit_int32(op(ldst_op) | fcn(f) | op3(prefetch_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +inline void Assembler::rdy(Register d) { + v9_dep(); + emit_int32(op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(0, 18, 14)); +} +inline void Assembler::rdccr(Register d) { + emit_int32(op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(2, 18, 14)); +} +inline void Assembler::rdasi(Register d) { + emit_int32(op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(3, 18, 14)); +} +inline void Assembler::rdtick(Register d) { + emit_int32(op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(4, 18, 14)); +} +inline void Assembler::rdpc(Register d) { + avoid_pipeline_stall(); + cti(); + emit_int32(op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(5, 18, 14)); + induce_pc_hazard(); +} +inline void Assembler::rdfprs(Register d) { + emit_int32(op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(6, 18, 14)); +} + +inline void Assembler::rett(Register s1, Register s2) { + cti(); + emit_int32(op(arith_op) | op3(rett_op3) | rs1(s1) | rs2(s2)); + induce_delay_slot(); +} +inline void Assembler::rett(Register s1, int simm13a, relocInfo::relocType rt) { + cti(); + emit_data(op(arith_op) | op3(rett_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rt); + induce_delay_slot(); +} + +inline void Assembler::save(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::save(Register s1, int simm13a, Register d) { + // make sure frame is at least large enough for the register save area + assert(-simm13a >= 16 * wordSize, "frame too small"); + emit_int32(op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +inline void Assembler::restore(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(restore_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::restore(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(restore_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +// pp 216 + +inline void Assembler::saved() { + emit_int32(op(arith_op) | fcn(0) | op3(saved_op3)); +} +inline void Assembler::restored() { + emit_int32(op(arith_op) | fcn(1) | op3(saved_op3)); +} + +inline void Assembler::sethi(int imm22a, Register d, RelocationHolder const &rspec) { + emit_data(op(branch_op) | rd(d) | op2(sethi_op2) | hi22(imm22a), rspec); +} + +inline void Assembler::sll(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(sll_op3) | rs1(s1) | sx(0) | rs2(s2)); +} +inline void Assembler::sll(Register s1, int imm5a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(sll_op3) | rs1(s1) | sx(0) | immed(true) | u_field(imm5a, 4, 0)); +} +inline void Assembler::srl(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(srl_op3) | rs1(s1) | sx(0) | rs2(s2)); +} +inline void Assembler::srl(Register s1, int imm5a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(srl_op3) | rs1(s1) | sx(0) | immed(true) | u_field(imm5a, 4, 0)); +} +inline void Assembler::sra(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(sra_op3) | rs1(s1) | sx(0) | rs2(s2)); +} +inline void Assembler::sra(Register s1, int imm5a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(sra_op3) | rs1(s1) | sx(0) | immed(true) | u_field(imm5a, 4, 0)); +} + +inline void Assembler::sllx(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(sll_op3) | rs1(s1) | sx(1) | rs2(s2)); +} +inline void Assembler::sllx(Register s1, int imm6a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(sll_op3) | rs1(s1) | sx(1) | immed(true) | u_field(imm6a, 5, 0)); +} +inline void Assembler::srlx(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(srl_op3) | rs1(s1) | sx(1) | rs2(s2)); +} +inline void Assembler::srlx(Register s1, int imm6a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(srl_op3) | rs1(s1) | sx(1) | immed(true) | u_field(imm6a, 5, 0)); +} +inline void Assembler::srax(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(sra_op3) | rs1(s1) | sx(1) | rs2(s2)); +} +inline void Assembler::srax(Register s1, int imm6a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(sra_op3) | rs1(s1) | sx(1) | immed(true) | u_field(imm6a, 5, 0)); +} + +inline void Assembler::sir(int simm13a) { + emit_int32(op(arith_op) | fcn(15) | op3(sir_op3) | immed(true) | simm(simm13a, 13)); +} + +// pp 221 + +inline void Assembler::stbar() { + emit_int32(op(arith_op) | op3(membar_op3) | u_field(15, 18, 14)); +} + +// pp 222 + +inline void Assembler::stf(FloatRegisterImpl::Width w, FloatRegister d, Register s1, Register s2) { + emit_int32(op(ldst_op) | fd(d, w) | alt_op3(stf_op3, w) | rs1(s1) | rs2(s2)); +} +inline void Assembler::stf(FloatRegisterImpl::Width w, FloatRegister d, Register s1, int simm13a) { + emit_data(op(ldst_op) | fd(d, w) | alt_op3(stf_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +inline void Assembler::std(FloatRegister d, Register s1, Register s2) { + assert(d->is_even(), "not even"); + stf(FloatRegisterImpl::D, d, s1, s2); +} +inline void Assembler::std(FloatRegister d, Register s1, int simm13a) { + assert(d->is_even(), "not even"); + stf(FloatRegisterImpl::D, d, s1, simm13a); +} + +inline void Assembler::stxfsr(Register s1, Register s2) { + emit_int32(op(ldst_op) | rd(G1) | op3(stfsr_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::stxfsr(Register s1, int simm13a) { + emit_data(op(ldst_op) | rd(G1) | op3(stfsr_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +inline void Assembler::stfa(FloatRegisterImpl::Width w, FloatRegister d, Register s1, Register s2, int ia) { + emit_int32(op(ldst_op) | fd(d, w) | alt_op3(stf_op3 | alt_bit_op3, w) | rs1(s1) | imm_asi(ia) | rs2(s2)); +} +inline void Assembler::stfa(FloatRegisterImpl::Width w, FloatRegister d, Register s1, int simm13a) { + emit_int32(op(ldst_op) | fd(d, w) | alt_op3(stf_op3 | alt_bit_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +// p 226 + +inline void Assembler::stb(Register d, Register s1, Register s2) { + emit_int32(op(ldst_op) | rd(d) | op3(stb_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::stb(Register d, Register s1, int simm13a) { + emit_data(op(ldst_op) | rd(d) | op3(stb_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::sth(Register d, Register s1, Register s2) { + emit_int32(op(ldst_op) | rd(d) | op3(sth_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::sth(Register d, Register s1, int simm13a) { + emit_data(op(ldst_op) | rd(d) | op3(sth_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::stw(Register d, Register s1, Register s2) { + emit_int32(op(ldst_op) | rd(d) | op3(stw_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::stw(Register d, Register s1, int simm13a) { + emit_data(op(ldst_op) | rd(d) | op3(stw_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + + +inline void Assembler::stx(Register d, Register s1, Register s2) { + emit_int32(op(ldst_op) | rd(d) | op3(stx_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::stx(Register d, Register s1, int simm13a) { + emit_data(op(ldst_op) | rd(d) | op3(stx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +inline void Assembler::stba(Register d, Register s1, Register s2, int ia) { + emit_int32(op(ldst_op) | rd(d) | op3(stb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2)); +} +inline void Assembler::stba(Register d, Register s1, int simm13a) { + emit_int32(op(ldst_op) | rd(d) | op3(stb_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::stha(Register d, Register s1, Register s2, int ia) { + emit_int32(op(ldst_op) | rd(d) | op3(sth_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2)); +} +inline void Assembler::stha(Register d, Register s1, int simm13a) { + emit_int32(op(ldst_op) | rd(d) | op3(sth_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::stwa(Register d, Register s1, Register s2, int ia) { + emit_int32(op(ldst_op) | rd(d) | op3(stw_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2)); +} +inline void Assembler::stwa(Register d, Register s1, int simm13a) { + emit_int32(op(ldst_op) | rd(d) | op3(stw_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::stxa(Register d, Register s1, Register s2, int ia) { + emit_int32(op(ldst_op) | rd(d) | op3(stx_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2)); +} +inline void Assembler::stxa(Register d, Register s1, int simm13a) { + emit_int32(op(ldst_op) | rd(d) | op3(stx_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::stda(Register d, Register s1, Register s2, int ia) { + emit_int32(op(ldst_op) | rd(d) | op3(std_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2)); +} +inline void Assembler::stda(Register d, Register s1, int simm13a) { + emit_int32(op(ldst_op) | rd(d) | op3(std_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +// pp 230 + +inline void Assembler::sub(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(sub_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::sub(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(sub_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +inline void Assembler::subcc(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(sub_op3 | cc_bit_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::subcc(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(sub_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::subc(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(subc_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::subc(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(subc_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::subccc(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(subc_op3 | cc_bit_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::subccc(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(subc_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +// pp 231 + +inline void Assembler::swap(Register s1, Register s2, Register d) { + v9_dep(); + emit_int32(op(ldst_op) | rd(d) | op3(swap_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::swap(Register s1, int simm13a, Register d) { + v9_dep(); + emit_data(op(ldst_op) | rd(d) | op3(swap_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +inline void Assembler::swapa(Register s1, Register s2, int ia, Register d) { + v9_dep(); + emit_int32(op(ldst_op) | rd(d) | op3(swap_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2)); +} +inline void Assembler::swapa(Register s1, int simm13a, Register d) { + v9_dep(); + emit_int32(op(ldst_op) | rd(d) | op3(swap_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +// pp 234, note op in book is wrong, see pp 268 + +inline void Assembler::taddcc(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(taddcc_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::taddcc(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(taddcc_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +// pp 235 + +inline void Assembler::tsubcc(Register s1, Register s2, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(tsubcc_op3) | rs1(s1) | rs2(s2)); +} +inline void Assembler::tsubcc(Register s1, int simm13a, Register d) { + emit_int32(op(arith_op) | rd(d) | op3(tsubcc_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); +} + +// pp 237 + +inline void Assembler::trap(Condition c, CC cc, Register s1, Register s2) { + emit_int32(op(arith_op) | cond(c) | op3(trap_op3) | rs1(s1) | trapcc(cc) | rs2(s2)); +} +inline void Assembler::trap(Condition c, CC cc, Register s1, int trapa) { + emit_int32(op(arith_op) | cond(c) | op3(trap_op3) | rs1(s1) | trapcc(cc) | immed(true) | u_field(trapa, 6, 0)); +} +// simple uncond. trap +inline void Assembler::trap(int trapa) { + trap(always, icc, G0, trapa); +} + +inline void Assembler::wry(Register d) { + v9_dep(); + emit_int32(op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(0, 29, 25)); +} +inline void Assembler::wrccr(Register s) { + emit_int32(op(arith_op) | rs1(s) | op3(wrreg_op3) | u_field(2, 29, 25)); +} +inline void Assembler::wrccr(Register s, int simm13a) { + emit_int32(op(arith_op) | rs1(s) | op3(wrreg_op3) | u_field(2, 29, 25) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::wrasi(Register d) { + emit_int32(op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(3, 29, 25)); +} +// wrasi(d, imm) stores (d xor imm) to asi +inline void Assembler::wrasi(Register d, int simm13a) { + emit_int32(op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(3, 29, 25) | immed(true) | simm(simm13a, 13)); +} +inline void Assembler::wrfprs(Register d) { + emit_int32(op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(6, 29, 25)); +} + +inline void Assembler::alignaddr(Register s1, Register s2, Register d) { + vis1_only(); + emit_int32(op(arith_op) | rd(d) | op3(alignaddr_op3) | rs1(s1) | opf(alignaddr_opf) | rs2(s2)); +} + +inline void Assembler::faligndata(FloatRegister s1, FloatRegister s2, FloatRegister d) { + vis1_only(); + emit_int32(op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(faligndata_op3) | fs1(s1, FloatRegisterImpl::D) | opf(faligndata_opf) | fs2(s2, FloatRegisterImpl::D)); +} + +inline void Assembler::fzero(FloatRegisterImpl::Width w, FloatRegister d) { + vis1_only(); + emit_int32(op(arith_op) | fd(d, w) | op3(fzero_op3) | opf(0x62 - w)); +} + +inline void Assembler::fsrc2(FloatRegisterImpl::Width w, FloatRegister s2, FloatRegister d) { + vis1_only(); + emit_int32(op(arith_op) | fd(d, w) | op3(fsrc_op3) | opf(0x7A - w) | fs2(s2, w)); +} + +inline void Assembler::fnot1(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister d) { + vis1_only(); + emit_int32(op(arith_op) | fd(d, w) | op3(fnot_op3) | fs1(s1, w) | opf(0x6C - w)); +} + +inline void Assembler::fpmerge(FloatRegister s1, FloatRegister s2, FloatRegister d) { + vis1_only(); + emit_int32(op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(0x36) | fs1(s1, FloatRegisterImpl::S) | opf(0x4b) | fs2(s2, FloatRegisterImpl::S)); +} + +inline void Assembler::stpartialf(Register s1, Register s2, FloatRegister d, int ia) { + vis1_only(); + emit_int32(op(ldst_op) | fd(d, FloatRegisterImpl::D) | op3(stpartialf_op3) | rs1(s1) | imm_asi(ia) | rs2(s2)); +} + +// VIS2 instructions + +inline void Assembler::edge8n(Register s1, Register s2, Register d) { + vis2_only(); + emit_int32(op(arith_op) | rd(d) | op3(edge_op3) | rs1(s1) | opf(edge8n_opf) | rs2(s2)); +} + +inline void Assembler::bmask(Register s1, Register s2, Register d) { + vis2_only(); + emit_int32(op(arith_op) | rd(d) | op3(bmask_op3) | rs1(s1) | opf(bmask_opf) | rs2(s2)); +} +inline void Assembler::bshuffle(FloatRegister s1, FloatRegister s2, FloatRegister d) { + vis2_only(); + emit_int32(op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(bshuffle_op3) | fs1(s1, FloatRegisterImpl::D) | opf(bshuffle_opf) | fs2(s2, FloatRegisterImpl::D)); +} + +// VIS3 instructions + +inline void Assembler::addxc(Register s1, Register s2, Register d) { + vis3_only(); + emit_int32(op(arith_op) | rd(d) | op3(addx_op3) | rs1(s1) | opf(addxc_opf) | rs2(s2)); +} +inline void Assembler::addxccc(Register s1, Register s2, Register d) { + vis3_only(); + emit_int32(op(arith_op) | rd(d) | op3(addx_op3) | rs1(s1) | opf(addxccc_opf) | rs2(s2)); +} + +inline void Assembler::movstosw(FloatRegister s, Register d) { + vis3_only(); + emit_int32(op(arith_op) | rd(d) | op3(mftoi_op3) | opf(mstosw_opf) | fs2(s, FloatRegisterImpl::S)); +} +inline void Assembler::movstouw(FloatRegister s, Register d) { + vis3_only(); + emit_int32(op(arith_op) | rd(d) | op3(mftoi_op3) | opf(mstouw_opf) | fs2(s, FloatRegisterImpl::S)); +} +inline void Assembler::movdtox(FloatRegister s, Register d) { + vis3_only(); + emit_int32(op(arith_op) | rd(d) | op3(mftoi_op3) | opf(mdtox_opf) | fs2(s, FloatRegisterImpl::D)); +} + +inline void Assembler::movwtos(Register s, FloatRegister d) { + vis3_only(); + emit_int32(op(arith_op) | fd(d, FloatRegisterImpl::S) | op3(mftoi_op3) | opf(mwtos_opf) | rs2(s)); +} +inline void Assembler::movxtod(Register s, FloatRegister d) { + vis3_only(); + emit_int32(op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(mftoi_op3) | opf(mxtod_opf) | rs2(s)); +} + +inline void Assembler::xmulx(Register s1, Register s2, Register d) { + vis3_only(); + emit_int32(op(arith_op) | rd(d) | op3(xmulx_op3) | rs1(s1) | opf(xmulx_opf) | rs2(s2)); +} +inline void Assembler::xmulxhi(Register s1, Register s2, Register d) { + vis3_only(); + emit_int32(op(arith_op) | rd(d) | op3(xmulx_op3) | rs1(s1) | opf(xmulxhi_opf) | rs2(s2)); +} +inline void Assembler::umulxhi(Register s1, Register s2, Register d) { + vis3_only(); + emit_int32(op(arith_op) | rd(d) | op3(umulx_op3) | rs1(s1) | opf(umulxhi_opf) | rs2(s2)); +} + +// Crypto SHA instructions + +inline void Assembler::sha1() { + sha1_only(); + emit_int32(op(arith_op) | op3(sha_op3) | opf(sha1_opf)); +} +inline void Assembler::sha256() { + sha256_only(); + emit_int32(op(arith_op) | op3(sha_op3) | opf(sha256_opf)); +} +inline void Assembler::sha512() { + sha512_only(); + emit_int32(op(arith_op) | op3(sha_op3) | opf(sha512_opf)); +} + +// CRC32C instruction + +inline void Assembler::crc32c(FloatRegister s1, FloatRegister s2, FloatRegister d) { + crc32c_only(); + emit_int32(op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(crc32c_op3) | fs1(s1, FloatRegisterImpl::D) | opf(crc32c_opf) | fs2(s2, FloatRegisterImpl::D)); +} + +// MPMUL instruction + +inline void Assembler::mpmul(int uimm5) { + mpmul_only(); + emit_int32(op(arith_op) | rd(0) | op3(mpmul_op3) | rs1(0) | opf(mpmul_opf) | uimm(uimm5, 5)); +} + +#endif // CPU_SPARC_ASSEMBLER_SPARC_INLINE_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/bytes_sparc.hpp b/src/hotspot/cpu/sparc/bytes_sparc.hpp --- a/src/hotspot/cpu/sparc/bytes_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/bytes_sparc.hpp 2023-04-16 11:42:11.056086599 +0000 @@ -0,0 +1,160 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_BYTES_SPARC_HPP +#define CPU_SPARC_BYTES_SPARC_HPP + +#include "memory/allocation.hpp" + +class Bytes: AllStatic { + public: + // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering + // Sparc needs to check for alignment. + + // can I count on address always being a pointer to an unsigned char? Yes + + // Thus, a swap between native and Java ordering is always a no-op: + static inline u2 swap_u2(u2 x) { return x; } + static inline u4 swap_u4(u4 x) { return x; } + static inline u8 swap_u8(u8 x) { return x; } + + static inline u2 get_native_u2(address p){ + return (intptr_t(p) & 1) == 0 + ? *(u2*)p + : ( u2(p[0]) << 8 ) + | ( u2(p[1]) ); + } + + static inline u4 get_native_u4(address p) { + switch (intptr_t(p) & 3) { + case 0: return *(u4*)p; + + case 2: return ( u4( ((u2*)p)[0] ) << 16 ) + | ( u4( ((u2*)p)[1] ) ); + + default: return ( u4(p[0]) << 24 ) + | ( u4(p[1]) << 16 ) + | ( u4(p[2]) << 8 ) + | u4(p[3]); + } + } + + static inline u8 get_native_u8(address p) { + switch (intptr_t(p) & 7) { + case 0: return *(u8*)p; + + case 4: return ( u8( ((u4*)p)[0] ) << 32 ) + | ( u8( ((u4*)p)[1] ) ); + + case 2: return ( u8( ((u2*)p)[0] ) << 48 ) + | ( u8( ((u2*)p)[1] ) << 32 ) + | ( u8( ((u2*)p)[2] ) << 16 ) + | ( u8( ((u2*)p)[3] ) ); + + default: return ( u8(p[0]) << 56 ) + | ( u8(p[1]) << 48 ) + | ( u8(p[2]) << 40 ) + | ( u8(p[3]) << 32 ) + | ( u8(p[4]) << 24 ) + | ( u8(p[5]) << 16 ) + | ( u8(p[6]) << 8 ) + | u8(p[7]); + } + } + + + + static inline void put_native_u2(address p, u2 x) { + if ( (intptr_t(p) & 1) == 0 ) *(u2*)p = x; + else { + p[0] = x >> 8; + p[1] = x; + } + } + + static inline void put_native_u4(address p, u4 x) { + switch ( intptr_t(p) & 3 ) { + case 0: *(u4*)p = x; + break; + + case 2: ((u2*)p)[0] = x >> 16; + ((u2*)p)[1] = x; + break; + + default: ((u1*)p)[0] = x >> 24; + ((u1*)p)[1] = x >> 16; + ((u1*)p)[2] = x >> 8; + ((u1*)p)[3] = x; + break; + } + } + + static inline void put_native_u8(address p, u8 x) { + switch ( intptr_t(p) & 7 ) { + case 0: *(u8*)p = x; + break; + + case 4: ((u4*)p)[0] = x >> 32; + ((u4*)p)[1] = x; + break; + + case 2: ((u2*)p)[0] = x >> 48; + ((u2*)p)[1] = x >> 32; + ((u2*)p)[2] = x >> 16; + ((u2*)p)[3] = x; + break; + + default: ((u1*)p)[0] = x >> 56; + ((u1*)p)[1] = x >> 48; + ((u1*)p)[2] = x >> 40; + ((u1*)p)[3] = x >> 32; + ((u1*)p)[4] = x >> 24; + ((u1*)p)[5] = x >> 16; + ((u1*)p)[6] = x >> 8; + ((u1*)p)[7] = x; + } + } + + + // Efficient reading and writing of unaligned unsigned data in Java byte ordering (i.e. big-endian ordering) + // (no byte-order reversal is needed since SPARC CPUs are big-endian oriented) + static inline u2 get_Java_u2(address p) { return get_native_u2(p); } + static inline u4 get_Java_u4(address p) { return get_native_u4(p); } + static inline u8 get_Java_u8(address p) { return get_native_u8(p); } + + static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, x); } + static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, x); } + static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, x); } +}; + +//Reconciliation History +// 1.7 98/02/24 10:18:41 bytes_i486.hpp +// 1.10 98/04/08 18:47:57 bytes_i486.hpp +// 1.13 98/07/15 17:10:03 bytes_i486.hpp +// 1.14 98/08/13 10:38:23 bytes_i486.hpp +// 1.15 98/10/05 16:30:21 bytes_i486.hpp +// 1.17 99/06/22 16:37:35 bytes_i486.hpp +//End + +#endif // CPU_SPARC_BYTES_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/c1_CodeStubs_sparc.cpp b/src/hotspot/cpu/sparc/c1_CodeStubs_sparc.cpp --- a/src/hotspot/cpu/sparc/c1_CodeStubs_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/c1_CodeStubs_sparc.cpp 2023-04-16 11:42:11.056318017 +0000 @@ -0,0 +1,462 @@ +/* + * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "c1/c1_CodeStubs.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "nativeInst_sparc.hpp" +#include "runtime/sharedRuntime.hpp" +#include "utilities/macros.hpp" +#include "vmreg_sparc.inline.hpp" + +#define __ ce->masm()-> + +void C1SafepointPollStub::emit_code(LIR_Assembler* ce) { + ShouldNotReachHere(); +} + +RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array) + : _index(index), _array(array), _throw_index_out_of_bounds_exception(false) { + assert(info != NULL, "must have info"); + _info = new CodeEmitInfo(info); +} + +RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index) + : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) { + assert(info != NULL, "must have info"); + _info = new CodeEmitInfo(info); +} + +void RangeCheckStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + + if (_info->deoptimize_on_exception()) { + address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + __ call(a, relocInfo::runtime_call_type); + __ delayed()->nop(); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); + return; + } + + if (_index->is_register()) { + __ mov(_index->as_register(), G4); + } else { + __ set(_index->as_jint(), G4); + } + if (_throw_index_out_of_bounds_exception) { + __ call(Runtime1::entry_for(Runtime1::throw_index_exception_id), relocInfo::runtime_call_type); + } else { + __ mov(_array->as_pointer_register(), G5); + __ call(Runtime1::entry_for(Runtime1::throw_range_check_failed_id), relocInfo::runtime_call_type); + } + __ delayed()->nop(); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + +PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) { + _info = new CodeEmitInfo(info); +} + +void PredicateFailedStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + __ call(a, relocInfo::runtime_call_type); + __ delayed()->nop(); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + +void CounterOverflowStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + __ set(_bci, G4); + Metadata *m = _method->as_constant_ptr()->as_metadata(); + __ set_metadata_constant(m, G5); + __ call(Runtime1::entry_for(Runtime1::counter_overflow_id), relocInfo::runtime_call_type); + __ delayed()->nop(); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + + __ br(Assembler::always, true, Assembler::pt, _continuation); + __ delayed()->nop(); +} + + +void DivByZeroStub::emit_code(LIR_Assembler* ce) { + if (_offset != -1) { + ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); + } + __ bind(_entry); + __ call(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type); + __ delayed()->nop(); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); +#ifdef ASSERT + __ should_not_reach_here(); +#endif +} + + +void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) { + address a; + if (_info->deoptimize_on_exception()) { + // Deoptimize, do not throw the exception, because it is probably wrong to do it here. + a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + } else { + a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id); + } + + ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); + __ bind(_entry); + __ call(a, relocInfo::runtime_call_type); + __ delayed()->nop(); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); +#ifdef ASSERT + __ should_not_reach_here(); +#endif +} + + +// Implementation of SimpleExceptionStub +// Note: %g1 and %g3 are already in use +void SimpleExceptionStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + __ call(Runtime1::entry_for(_stub), relocInfo::runtime_call_type); + + if (_obj->is_valid()) { + __ delayed()->mov(_obj->as_register(), G4); // _obj contains the optional argument to the stub + } else { + __ delayed()->mov(G0, G4); + } + ce->add_call_info_here(_info); +#ifdef ASSERT + __ should_not_reach_here(); +#endif +} + + +// Implementation of NewInstanceStub + +NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) { + _result = result; + _klass = klass; + _klass_reg = klass_reg; + _info = new CodeEmitInfo(info); + assert(stub_id == Runtime1::new_instance_id || + stub_id == Runtime1::fast_new_instance_id || + stub_id == Runtime1::fast_new_instance_init_check_id, + "need new_instance id"); + _stub_id = stub_id; +} + + +void NewInstanceStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + __ call(Runtime1::entry_for(_stub_id), relocInfo::runtime_call_type); + __ delayed()->mov_or_nop(_klass_reg->as_register(), G5); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ br(Assembler::always, false, Assembler::pt, _continuation); + __ delayed()->mov_or_nop(O0, _result->as_register()); +} + + +// Implementation of NewTypeArrayStub +NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { + _klass_reg = klass_reg; + _length = length; + _result = result; + _info = new CodeEmitInfo(info); +} + + +void NewTypeArrayStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + + __ mov(_length->as_register(), G4); + __ call(Runtime1::entry_for(Runtime1::new_type_array_id), relocInfo::runtime_call_type); + __ delayed()->mov_or_nop(_klass_reg->as_register(), G5); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ br(Assembler::always, false, Assembler::pt, _continuation); + __ delayed()->mov_or_nop(O0, _result->as_register()); +} + + +// Implementation of NewObjectArrayStub + +NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { + _klass_reg = klass_reg; + _length = length; + _result = result; + _info = new CodeEmitInfo(info); +} + + +void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + + __ mov(_length->as_register(), G4); + __ call(Runtime1::entry_for(Runtime1::new_object_array_id), relocInfo::runtime_call_type); + __ delayed()->mov_or_nop(_klass_reg->as_register(), G5); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ br(Assembler::always, false, Assembler::pt, _continuation); + __ delayed()->mov_or_nop(O0, _result->as_register()); +} + + +// Implementation of MonitorAccessStubs +MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info) + : MonitorAccessStub(obj_reg, lock_reg) { + _info = new CodeEmitInfo(info); +} + + +void MonitorEnterStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + __ mov(_obj_reg->as_register(), G4); + if (ce->compilation()->has_fpu_code()) { + __ call(Runtime1::entry_for(Runtime1::monitorenter_id), relocInfo::runtime_call_type); + } else { + __ call(Runtime1::entry_for(Runtime1::monitorenter_nofpu_id), relocInfo::runtime_call_type); + } + __ delayed()->mov_or_nop(_lock_reg->as_register(), G5); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ br(Assembler::always, true, Assembler::pt, _continuation); + __ delayed()->nop(); +} + + +void MonitorExitStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + if (_compute_lock) { + ce->monitor_address(_monitor_ix, _lock_reg); + } + if (ce->compilation()->has_fpu_code()) { + __ call(Runtime1::entry_for(Runtime1::monitorexit_id), relocInfo::runtime_call_type); + } else { + __ call(Runtime1::entry_for(Runtime1::monitorexit_nofpu_id), relocInfo::runtime_call_type); + } + + __ delayed()->mov_or_nop(_lock_reg->as_register(), G4); + __ br(Assembler::always, true, Assembler::pt, _continuation); + __ delayed()->nop(); +} + +// Implementation of patching: +// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes) +// - Replace original code with a call to the stub +// At Runtime: +// - call to stub, jump to runtime +// - in runtime: preserve all registers (especially objects, i.e., source and destination object) +// - in runtime: after initializing class, restore original code, reexecute instruction + +int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size; + +void PatchingStub::align_patch_site(MacroAssembler* ) { + // patch sites on sparc are always properly aligned. +} + +void PatchingStub::emit_code(LIR_Assembler* ce) { + // copy original code here + assert(NativeCall::instruction_size <= _bytes_to_copy && _bytes_to_copy <= 0xFF, + "not enough room for call"); + assert((_bytes_to_copy & 0x3) == 0, "must copy a multiple of four bytes"); + + Label call_patch; + + int being_initialized_entry = __ offset(); + + if (_id == load_klass_id) { + // produce a copy of the load klass instruction for use by the being initialized case +#ifdef ASSERT + address start = __ pc(); +#endif + AddressLiteral addrlit(NULL, metadata_Relocation::spec(_index)); + __ patchable_set(addrlit, _obj); + +#ifdef ASSERT + for (int i = 0; i < _bytes_to_copy; i++) { + address ptr = (address)(_pc_start + i); + int a_byte = (*ptr) & 0xFF; + assert(a_byte == *start++, "should be the same code"); + } +#endif + } else if (_id == load_mirror_id || _id == load_appendix_id) { + // produce a copy of the load mirror instruction for use by the being initialized case +#ifdef ASSERT + address start = __ pc(); +#endif + AddressLiteral addrlit(NULL, oop_Relocation::spec(_index)); + __ patchable_set(addrlit, _obj); + +#ifdef ASSERT + for (int i = 0; i < _bytes_to_copy; i++) { + address ptr = (address)(_pc_start + i); + int a_byte = (*ptr) & 0xFF; + assert(a_byte == *start++, "should be the same code"); + } +#endif + } else { + // make a copy the code which is going to be patched. + for (int i = 0; i < _bytes_to_copy; i++) { + address ptr = (address)(_pc_start + i); + int a_byte = (*ptr) & 0xFF; + __ emit_int8 (a_byte); + } + } + + address end_of_patch = __ pc(); + int bytes_to_skip = 0; + if (_id == load_mirror_id) { + int offset = __ offset(); + if (CommentedAssembly) { + __ block_comment(" being_initialized check"); + } + + // static field accesses have special semantics while the class + // initializer is being run so we emit a test which can be used to + // check that this code is being executed by the initializing + // thread. + assert(_obj != noreg, "must be a valid register"); + assert(_index >= 0, "must have oop index"); + __ ld_ptr(_obj, java_lang_Class::klass_offset(), G3); + __ ld_ptr(G3, in_bytes(InstanceKlass::init_thread_offset()), G3); + __ cmp_and_brx_short(G2_thread, G3, Assembler::notEqual, Assembler::pn, call_patch); + + // load_klass patches may execute the patched code before it's + // copied back into place so we need to jump back into the main + // code of the nmethod to continue execution. + __ br(Assembler::always, false, Assembler::pt, _patch_site_continuation); + __ delayed()->nop(); + + // make sure this extra code gets skipped + bytes_to_skip += __ offset() - offset; + } + + // Now emit the patch record telling the runtime how to find the + // pieces of the patch. We only need 3 bytes but it has to be + // aligned as an instruction so emit 4 bytes. + int sizeof_patch_record = 4; + bytes_to_skip += sizeof_patch_record; + + // emit the offsets needed to find the code to patch + int being_initialized_entry_offset = __ offset() - being_initialized_entry + sizeof_patch_record; + + // Emit the patch record. We need to emit a full word, so emit an extra empty byte + __ emit_int8(0); + __ emit_int8(being_initialized_entry_offset); + __ emit_int8(bytes_to_skip); + __ emit_int8(_bytes_to_copy); + address patch_info_pc = __ pc(); + assert(patch_info_pc - end_of_patch == bytes_to_skip, "incorrect patch info"); + + address entry = __ pc(); + NativeGeneralJump::insert_unconditional((address)_pc_start, entry); + address target = NULL; + relocInfo::relocType reloc_type = relocInfo::none; + switch (_id) { + case access_field_id: target = Runtime1::entry_for(Runtime1::access_field_patching_id); break; + case load_klass_id: target = Runtime1::entry_for(Runtime1::load_klass_patching_id); reloc_type = relocInfo::metadata_type; break; + case load_mirror_id: target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); reloc_type = relocInfo::oop_type; break; + case load_appendix_id: target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); reloc_type = relocInfo::oop_type; break; + default: ShouldNotReachHere(); + } + __ bind(call_patch); + + if (CommentedAssembly) { + __ block_comment("patch entry point"); + } + __ call(target, relocInfo::runtime_call_type); + __ delayed()->nop(); + assert(_patch_info_offset == (patch_info_pc - __ pc()), "must not change"); + ce->add_call_info_here(_info); + __ br(Assembler::always, false, Assembler::pt, _patch_site_entry); + __ delayed()->nop(); + if (_id == load_klass_id || _id == load_mirror_id || _id == load_appendix_id) { + CodeSection* cs = __ code_section(); + address pc = (address)_pc_start; + RelocIterator iter(cs, pc, pc + 1); + relocInfo::change_reloc_info_for_address(&iter, (address) pc, reloc_type, relocInfo::none); + + pc = (address)(_pc_start + NativeMovConstReg::add_offset); + RelocIterator iter2(cs, pc, pc+1); + relocInfo::change_reloc_info_for_address(&iter2, (address) pc, reloc_type, relocInfo::none); + } + +} + + +void DeoptimizeStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + __ set(_trap_request, G4); + __ call(Runtime1::entry_for(Runtime1::deoptimize_id), relocInfo::runtime_call_type); + __ delayed()->nop(); + ce->add_call_info_here(_info); + DEBUG_ONLY(__ should_not_reach_here()); +} + + +void ArrayCopyStub::emit_code(LIR_Assembler* ce) { + //---------------slow case: call to native----------------- + __ bind(_entry); + __ mov(src()->as_register(), O0); + __ mov(src_pos()->as_register(), O1); + __ mov(dst()->as_register(), O2); + __ mov(dst_pos()->as_register(), O3); + __ mov(length()->as_register(), O4); + + ce->emit_static_call_stub(); + if (ce->compilation()->bailed_out()) { + return; // CodeCache is full + } + + __ call(SharedRuntime::get_resolve_static_call_stub(), relocInfo::static_call_type); + __ delayed()->nop(); + ce->add_call_info_here(info()); + ce->verify_oop_map(info()); + +#ifndef PRODUCT + __ set((intptr_t)&Runtime1::_arraycopy_slowcase_cnt, O0); + __ ld(O0, 0, O1); + __ inc(O1); + __ st(O1, 0, O0); +#endif + + __ br(Assembler::always, false, Assembler::pt, _continuation); + __ delayed()->nop(); +} + +#undef __ diff -ur --new-file a/src/hotspot/cpu/sparc/c1_Defs_sparc.hpp b/src/hotspot/cpu/sparc/c1_Defs_sparc.hpp --- a/src/hotspot/cpu/sparc/c1_Defs_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/c1_Defs_sparc.hpp 2023-04-16 11:42:11.056437336 +0000 @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_C1_DEFS_SPARC_HPP +#define CPU_SPARC_C1_DEFS_SPARC_HPP + +// native word offsets from memory address (big endian) +enum { + pd_lo_word_offset_in_bytes = BytesPerInt, + pd_hi_word_offset_in_bytes = 0 +}; + + +// explicit rounding operations are not required to implement the strictFP mode +enum { + pd_strict_fp_requires_explicit_rounding = false +}; + + +// registers +enum { + pd_nof_cpu_regs_frame_map = 32, // number of registers used during code emission + pd_nof_caller_save_cpu_regs_frame_map = 10, // number of cpu registers killed by calls + pd_nof_cpu_regs_reg_alloc = 20, // number of registers that are visible to register allocator + pd_nof_cpu_regs_linearscan = 32,// number of registers visible linear scan + pd_first_cpu_reg = 0, + pd_last_cpu_reg = 31, + pd_last_allocatable_cpu_reg = 19, + pd_first_callee_saved_reg = 0, + pd_last_callee_saved_reg = 13, + + pd_nof_fpu_regs_frame_map = 32, // number of registers used during code emission + pd_nof_caller_save_fpu_regs_frame_map = 32, // number of fpu registers killed by calls + pd_nof_fpu_regs_reg_alloc = 32, // number of registers that are visible to register allocator + pd_nof_fpu_regs_linearscan = 32, // number of registers visible to linear scan + pd_first_fpu_reg = pd_nof_cpu_regs_frame_map, + pd_last_fpu_reg = pd_nof_cpu_regs_frame_map + pd_nof_fpu_regs_frame_map - 1, + + pd_nof_xmm_regs_linearscan = 0, + pd_nof_caller_save_xmm_regs = 0, + pd_first_xmm_reg = -1, + pd_last_xmm_reg = -1 +}; + + +// for debug info: a float value in a register is saved in single precision by runtime stubs +enum { + pd_float_saved_as_double = false +}; + +#endif // CPU_SPARC_C1_DEFS_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/c1_FpuStackSim_sparc.cpp b/src/hotspot/cpu/sparc/c1_FpuStackSim_sparc.cpp --- a/src/hotspot/cpu/sparc/c1_FpuStackSim_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/c1_FpuStackSim_sparc.cpp 2023-04-16 11:42:11.056551284 +0000 @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// No FPU stack on SPARC diff -ur --new-file a/src/hotspot/cpu/sparc/c1_FpuStackSim_sparc.hpp b/src/hotspot/cpu/sparc/c1_FpuStackSim_sparc.hpp --- a/src/hotspot/cpu/sparc/c1_FpuStackSim_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/c1_FpuStackSim_sparc.hpp 2023-04-16 11:42:11.056665493 +0000 @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_C1_FPUSTACKSIM_SPARC_HPP +#define CPU_SPARC_C1_FPUSTACKSIM_SPARC_HPP + +// No FPU stack on SPARC +class FpuStackSim; + +#endif // CPU_SPARC_C1_FPUSTACKSIM_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/c1_FrameMap_sparc.cpp b/src/hotspot/cpu/sparc/c1_FrameMap_sparc.cpp --- a/src/hotspot/cpu/sparc/c1_FrameMap_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/c1_FrameMap_sparc.cpp 2023-04-16 11:42:11.056887074 +0000 @@ -0,0 +1,441 @@ +/* + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_LIR.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_sparc.inline.hpp" + + +const int FrameMap::pd_c_runtime_reserved_arg_size = 7; + + +LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool outgoing) { + LIR_Opr opr = LIR_OprFact::illegalOpr; + VMReg r_1 = reg->first(); + VMReg r_2 = reg->second(); + if (r_1->is_stack()) { + // Convert stack slot to an SP offset + // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value + // so we must add it in here. + int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; + opr = LIR_OprFact::address(new LIR_Address(SP_opr, st_off + STACK_BIAS, type)); + } else if (r_1->is_Register()) { + Register reg = r_1->as_Register(); + if (outgoing) { + assert(!reg->is_in(), "should be using I regs"); + } else { + assert(!reg->is_out(), "should be using O regs"); + } + if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) { + opr = as_long_opr(reg); + } else if (is_reference_type(type)) { + opr = as_oop_opr(reg); + } else if (type == T_METADATA) { + opr = as_metadata_opr(reg); + } else if (type == T_ADDRESS) { + opr = as_address_opr(reg); + } else { + opr = as_opr(reg); + } + } else if (r_1->is_FloatRegister()) { + assert(type == T_DOUBLE || type == T_FLOAT, "wrong type"); + FloatRegister f = r_1->as_FloatRegister(); + if (type == T_DOUBLE) { + opr = as_double_opr(f); + } else { + opr = as_float_opr(f); + } + } + return opr; +} + +// FrameMap +//-------------------------------------------------------- + +FloatRegister FrameMap::_fpu_regs [FrameMap::nof_fpu_regs]; + +// some useful constant RInfo's: +LIR_Opr FrameMap::in_long_opr; +LIR_Opr FrameMap::out_long_opr; +LIR_Opr FrameMap::g1_long_single_opr; + +LIR_Opr FrameMap::F0_opr; +LIR_Opr FrameMap::F0_double_opr; + +LIR_Opr FrameMap::G0_opr; +LIR_Opr FrameMap::G1_opr; +LIR_Opr FrameMap::G2_opr; +LIR_Opr FrameMap::G3_opr; +LIR_Opr FrameMap::G4_opr; +LIR_Opr FrameMap::G5_opr; +LIR_Opr FrameMap::G6_opr; +LIR_Opr FrameMap::G7_opr; +LIR_Opr FrameMap::O0_opr; +LIR_Opr FrameMap::O1_opr; +LIR_Opr FrameMap::O2_opr; +LIR_Opr FrameMap::O3_opr; +LIR_Opr FrameMap::O4_opr; +LIR_Opr FrameMap::O5_opr; +LIR_Opr FrameMap::O6_opr; +LIR_Opr FrameMap::O7_opr; +LIR_Opr FrameMap::L0_opr; +LIR_Opr FrameMap::L1_opr; +LIR_Opr FrameMap::L2_opr; +LIR_Opr FrameMap::L3_opr; +LIR_Opr FrameMap::L4_opr; +LIR_Opr FrameMap::L5_opr; +LIR_Opr FrameMap::L6_opr; +LIR_Opr FrameMap::L7_opr; +LIR_Opr FrameMap::I0_opr; +LIR_Opr FrameMap::I1_opr; +LIR_Opr FrameMap::I2_opr; +LIR_Opr FrameMap::I3_opr; +LIR_Opr FrameMap::I4_opr; +LIR_Opr FrameMap::I5_opr; +LIR_Opr FrameMap::I6_opr; +LIR_Opr FrameMap::I7_opr; + +LIR_Opr FrameMap::G0_oop_opr; +LIR_Opr FrameMap::G1_oop_opr; +LIR_Opr FrameMap::G2_oop_opr; +LIR_Opr FrameMap::G3_oop_opr; +LIR_Opr FrameMap::G4_oop_opr; +LIR_Opr FrameMap::G5_oop_opr; +LIR_Opr FrameMap::G6_oop_opr; +LIR_Opr FrameMap::G7_oop_opr; +LIR_Opr FrameMap::O0_oop_opr; +LIR_Opr FrameMap::O1_oop_opr; +LIR_Opr FrameMap::O2_oop_opr; +LIR_Opr FrameMap::O3_oop_opr; +LIR_Opr FrameMap::O4_oop_opr; +LIR_Opr FrameMap::O5_oop_opr; +LIR_Opr FrameMap::O6_oop_opr; +LIR_Opr FrameMap::O7_oop_opr; +LIR_Opr FrameMap::L0_oop_opr; +LIR_Opr FrameMap::L1_oop_opr; +LIR_Opr FrameMap::L2_oop_opr; +LIR_Opr FrameMap::L3_oop_opr; +LIR_Opr FrameMap::L4_oop_opr; +LIR_Opr FrameMap::L5_oop_opr; +LIR_Opr FrameMap::L6_oop_opr; +LIR_Opr FrameMap::L7_oop_opr; +LIR_Opr FrameMap::I0_oop_opr; +LIR_Opr FrameMap::I1_oop_opr; +LIR_Opr FrameMap::I2_oop_opr; +LIR_Opr FrameMap::I3_oop_opr; +LIR_Opr FrameMap::I4_oop_opr; +LIR_Opr FrameMap::I5_oop_opr; +LIR_Opr FrameMap::I6_oop_opr; +LIR_Opr FrameMap::I7_oop_opr; + +LIR_Opr FrameMap::G0_metadata_opr; +LIR_Opr FrameMap::G1_metadata_opr; +LIR_Opr FrameMap::G2_metadata_opr; +LIR_Opr FrameMap::G3_metadata_opr; +LIR_Opr FrameMap::G4_metadata_opr; +LIR_Opr FrameMap::G5_metadata_opr; +LIR_Opr FrameMap::G6_metadata_opr; +LIR_Opr FrameMap::G7_metadata_opr; +LIR_Opr FrameMap::O0_metadata_opr; +LIR_Opr FrameMap::O1_metadata_opr; +LIR_Opr FrameMap::O2_metadata_opr; +LIR_Opr FrameMap::O3_metadata_opr; +LIR_Opr FrameMap::O4_metadata_opr; +LIR_Opr FrameMap::O5_metadata_opr; +LIR_Opr FrameMap::O6_metadata_opr; +LIR_Opr FrameMap::O7_metadata_opr; +LIR_Opr FrameMap::L0_metadata_opr; +LIR_Opr FrameMap::L1_metadata_opr; +LIR_Opr FrameMap::L2_metadata_opr; +LIR_Opr FrameMap::L3_metadata_opr; +LIR_Opr FrameMap::L4_metadata_opr; +LIR_Opr FrameMap::L5_metadata_opr; +LIR_Opr FrameMap::L6_metadata_opr; +LIR_Opr FrameMap::L7_metadata_opr; +LIR_Opr FrameMap::I0_metadata_opr; +LIR_Opr FrameMap::I1_metadata_opr; +LIR_Opr FrameMap::I2_metadata_opr; +LIR_Opr FrameMap::I3_metadata_opr; +LIR_Opr FrameMap::I4_metadata_opr; +LIR_Opr FrameMap::I5_metadata_opr; +LIR_Opr FrameMap::I6_metadata_opr; +LIR_Opr FrameMap::I7_metadata_opr; + +LIR_Opr FrameMap::SP_opr; +LIR_Opr FrameMap::FP_opr; + +LIR_Opr FrameMap::Oexception_opr; +LIR_Opr FrameMap::Oissuing_pc_opr; + +LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, }; +LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, }; + + +FloatRegister FrameMap::nr2floatreg (int rnr) { + assert(_init_done, "tables not initialized"); + debug_only(fpu_range_check(rnr);) + return _fpu_regs[rnr]; +} + + +// returns true if reg could be smashed by a callee. +bool FrameMap::is_caller_save_register (LIR_Opr reg) { + if (reg->is_single_fpu() || reg->is_double_fpu()) { return true; } + if (reg->is_double_cpu()) { + return is_caller_save_register(reg->as_register_lo()) || + is_caller_save_register(reg->as_register_hi()); + } + return is_caller_save_register(reg->as_register()); +} + + +NEEDS_CLEANUP // once the new calling convention is enabled, we no + // longer need to treat I5, I4 and L0 specially +// Because the interpreter destroys caller's I5, I4 and L0, +// we must spill them before doing a Java call as we may land in +// interpreter. +bool FrameMap::is_caller_save_register (Register r) { + return (r->is_global() && (r != G0)) || r->is_out(); +} + + +void FrameMap::initialize() { + assert(!_init_done, "once"); + + int i=0; + // Register usage: + // O6: sp + // I6: fp + // I7: return address + // G0: zero + // G2: thread + // G7: not available + // G6: not available + /* 0 */ map_register(i++, L0); + /* 1 */ map_register(i++, L1); + /* 2 */ map_register(i++, L2); + /* 3 */ map_register(i++, L3); + /* 4 */ map_register(i++, L4); + /* 5 */ map_register(i++, L5); + /* 6 */ map_register(i++, L6); + /* 7 */ map_register(i++, L7); + + /* 8 */ map_register(i++, I0); + /* 9 */ map_register(i++, I1); + /* 10 */ map_register(i++, I2); + /* 11 */ map_register(i++, I3); + /* 12 */ map_register(i++, I4); + /* 13 */ map_register(i++, I5); + /* 14 */ map_register(i++, O0); + /* 15 */ map_register(i++, O1); + /* 16 */ map_register(i++, O2); + /* 17 */ map_register(i++, O3); + /* 18 */ map_register(i++, O4); + /* 19 */ map_register(i++, O5); // <- last register visible in RegAlloc (RegAlloc::nof+cpu_regs) + /* 20 */ map_register(i++, G1); + /* 21 */ map_register(i++, G3); + /* 22 */ map_register(i++, G4); + /* 23 */ map_register(i++, G5); + /* 24 */ map_register(i++, G0); + + // the following registers are not normally available + /* 25 */ map_register(i++, O7); + /* 26 */ map_register(i++, G2); + /* 27 */ map_register(i++, O6); + /* 28 */ map_register(i++, I6); + /* 29 */ map_register(i++, I7); + /* 30 */ map_register(i++, G6); + /* 31 */ map_register(i++, G7); + assert(i == nof_cpu_regs, "number of CPU registers"); + + for (i = 0; i < nof_fpu_regs; i++) { + _fpu_regs[i] = as_FloatRegister(i); + } + + _init_done = true; + + in_long_opr = as_long_opr(I0); + out_long_opr = as_long_opr(O0); + g1_long_single_opr = as_long_single_opr(G1); + + G0_opr = as_opr(G0); + G1_opr = as_opr(G1); + G2_opr = as_opr(G2); + G3_opr = as_opr(G3); + G4_opr = as_opr(G4); + G5_opr = as_opr(G5); + G6_opr = as_opr(G6); + G7_opr = as_opr(G7); + O0_opr = as_opr(O0); + O1_opr = as_opr(O1); + O2_opr = as_opr(O2); + O3_opr = as_opr(O3); + O4_opr = as_opr(O4); + O5_opr = as_opr(O5); + O6_opr = as_opr(O6); + O7_opr = as_opr(O7); + L0_opr = as_opr(L0); + L1_opr = as_opr(L1); + L2_opr = as_opr(L2); + L3_opr = as_opr(L3); + L4_opr = as_opr(L4); + L5_opr = as_opr(L5); + L6_opr = as_opr(L6); + L7_opr = as_opr(L7); + I0_opr = as_opr(I0); + I1_opr = as_opr(I1); + I2_opr = as_opr(I2); + I3_opr = as_opr(I3); + I4_opr = as_opr(I4); + I5_opr = as_opr(I5); + I6_opr = as_opr(I6); + I7_opr = as_opr(I7); + + G0_oop_opr = as_oop_opr(G0); + G1_oop_opr = as_oop_opr(G1); + G2_oop_opr = as_oop_opr(G2); + G3_oop_opr = as_oop_opr(G3); + G4_oop_opr = as_oop_opr(G4); + G5_oop_opr = as_oop_opr(G5); + G6_oop_opr = as_oop_opr(G6); + G7_oop_opr = as_oop_opr(G7); + O0_oop_opr = as_oop_opr(O0); + O1_oop_opr = as_oop_opr(O1); + O2_oop_opr = as_oop_opr(O2); + O3_oop_opr = as_oop_opr(O3); + O4_oop_opr = as_oop_opr(O4); + O5_oop_opr = as_oop_opr(O5); + O6_oop_opr = as_oop_opr(O6); + O7_oop_opr = as_oop_opr(O7); + L0_oop_opr = as_oop_opr(L0); + L1_oop_opr = as_oop_opr(L1); + L2_oop_opr = as_oop_opr(L2); + L3_oop_opr = as_oop_opr(L3); + L4_oop_opr = as_oop_opr(L4); + L5_oop_opr = as_oop_opr(L5); + L6_oop_opr = as_oop_opr(L6); + L7_oop_opr = as_oop_opr(L7); + I0_oop_opr = as_oop_opr(I0); + I1_oop_opr = as_oop_opr(I1); + I2_oop_opr = as_oop_opr(I2); + I3_oop_opr = as_oop_opr(I3); + I4_oop_opr = as_oop_opr(I4); + I5_oop_opr = as_oop_opr(I5); + I6_oop_opr = as_oop_opr(I6); + I7_oop_opr = as_oop_opr(I7); + + G0_metadata_opr = as_metadata_opr(G0); + G1_metadata_opr = as_metadata_opr(G1); + G2_metadata_opr = as_metadata_opr(G2); + G3_metadata_opr = as_metadata_opr(G3); + G4_metadata_opr = as_metadata_opr(G4); + G5_metadata_opr = as_metadata_opr(G5); + G6_metadata_opr = as_metadata_opr(G6); + G7_metadata_opr = as_metadata_opr(G7); + O0_metadata_opr = as_metadata_opr(O0); + O1_metadata_opr = as_metadata_opr(O1); + O2_metadata_opr = as_metadata_opr(O2); + O3_metadata_opr = as_metadata_opr(O3); + O4_metadata_opr = as_metadata_opr(O4); + O5_metadata_opr = as_metadata_opr(O5); + O6_metadata_opr = as_metadata_opr(O6); + O7_metadata_opr = as_metadata_opr(O7); + L0_metadata_opr = as_metadata_opr(L0); + L1_metadata_opr = as_metadata_opr(L1); + L2_metadata_opr = as_metadata_opr(L2); + L3_metadata_opr = as_metadata_opr(L3); + L4_metadata_opr = as_metadata_opr(L4); + L5_metadata_opr = as_metadata_opr(L5); + L6_metadata_opr = as_metadata_opr(L6); + L7_metadata_opr = as_metadata_opr(L7); + I0_metadata_opr = as_metadata_opr(I0); + I1_metadata_opr = as_metadata_opr(I1); + I2_metadata_opr = as_metadata_opr(I2); + I3_metadata_opr = as_metadata_opr(I3); + I4_metadata_opr = as_metadata_opr(I4); + I5_metadata_opr = as_metadata_opr(I5); + I6_metadata_opr = as_metadata_opr(I6); + I7_metadata_opr = as_metadata_opr(I7); + + FP_opr = as_pointer_opr(FP); + SP_opr = as_pointer_opr(SP); + + F0_opr = as_float_opr(F0); + F0_double_opr = as_double_opr(F0); + + Oexception_opr = as_oop_opr(Oexception); + Oissuing_pc_opr = as_opr(Oissuing_pc); + + _caller_save_cpu_regs[0] = FrameMap::O0_opr; + _caller_save_cpu_regs[1] = FrameMap::O1_opr; + _caller_save_cpu_regs[2] = FrameMap::O2_opr; + _caller_save_cpu_regs[3] = FrameMap::O3_opr; + _caller_save_cpu_regs[4] = FrameMap::O4_opr; + _caller_save_cpu_regs[5] = FrameMap::O5_opr; + _caller_save_cpu_regs[6] = FrameMap::G1_opr; + _caller_save_cpu_regs[7] = FrameMap::G3_opr; + _caller_save_cpu_regs[8] = FrameMap::G4_opr; + _caller_save_cpu_regs[9] = FrameMap::G5_opr; + for (int i = 0; i < nof_caller_save_fpu_regs; i++) { + _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i); + } +} + + +Address FrameMap::make_new_address(ByteSize sp_offset) const { + return Address(SP, STACK_BIAS + in_bytes(sp_offset)); +} + + +VMReg FrameMap::fpu_regname (int n) { + return as_FloatRegister(n)->as_VMReg(); +} + + +LIR_Opr FrameMap::stack_pointer() { + return SP_opr; +} + + +// JSR 292 +LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() { + assert(L7 == L7_mh_SP_save, "must be same register"); + return L7_opr; +} + + +bool FrameMap::validate_frame() { + int max_offset = in_bytes(framesize_in_bytes()); + int java_index = 0; + for (int i = 0; i < _incoming_arguments->length(); i++) { + LIR_Opr opr = _incoming_arguments->at(i); + if (opr->is_stack()) { + max_offset = MAX2(_argument_locations->at(java_index), max_offset); + } + java_index += type2size[opr->type()]; + } + return Assembler::is_simm13(max_offset + STACK_BIAS); +} diff -ur --new-file a/src/hotspot/cpu/sparc/c1_FrameMap_sparc.hpp b/src/hotspot/cpu/sparc/c1_FrameMap_sparc.hpp --- a/src/hotspot/cpu/sparc/c1_FrameMap_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/c1_FrameMap_sparc.hpp 2023-04-16 11:42:11.057037943 +0000 @@ -0,0 +1,185 @@ +/* + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_C1_FRAMEMAP_SPARC_HPP +#define CPU_SPARC_C1_FRAMEMAP_SPARC_HPP + + public: + + enum { + nof_reg_args = 6, // registers o0-o5 are available for parameter passing + first_available_sp_in_frame = frame::memory_parameter_word_sp_offset * BytesPerWord, + frame_pad_in_bytes = 0 + }; + + static const int pd_c_runtime_reserved_arg_size; + + static LIR_Opr G0_opr; + static LIR_Opr G1_opr; + static LIR_Opr G2_opr; + static LIR_Opr G3_opr; + static LIR_Opr G4_opr; + static LIR_Opr G5_opr; + static LIR_Opr G6_opr; + static LIR_Opr G7_opr; + static LIR_Opr O0_opr; + static LIR_Opr O1_opr; + static LIR_Opr O2_opr; + static LIR_Opr O3_opr; + static LIR_Opr O4_opr; + static LIR_Opr O5_opr; + static LIR_Opr O6_opr; + static LIR_Opr O7_opr; + static LIR_Opr L0_opr; + static LIR_Opr L1_opr; + static LIR_Opr L2_opr; + static LIR_Opr L3_opr; + static LIR_Opr L4_opr; + static LIR_Opr L5_opr; + static LIR_Opr L6_opr; + static LIR_Opr L7_opr; + static LIR_Opr I0_opr; + static LIR_Opr I1_opr; + static LIR_Opr I2_opr; + static LIR_Opr I3_opr; + static LIR_Opr I4_opr; + static LIR_Opr I5_opr; + static LIR_Opr I6_opr; + static LIR_Opr I7_opr; + + static LIR_Opr SP_opr; + static LIR_Opr FP_opr; + + static LIR_Opr G0_oop_opr; + static LIR_Opr G1_oop_opr; + static LIR_Opr G2_oop_opr; + static LIR_Opr G3_oop_opr; + static LIR_Opr G4_oop_opr; + static LIR_Opr G5_oop_opr; + static LIR_Opr G6_oop_opr; + static LIR_Opr G7_oop_opr; + static LIR_Opr O0_oop_opr; + static LIR_Opr O1_oop_opr; + static LIR_Opr O2_oop_opr; + static LIR_Opr O3_oop_opr; + static LIR_Opr O4_oop_opr; + static LIR_Opr O5_oop_opr; + static LIR_Opr O6_oop_opr; + static LIR_Opr O7_oop_opr; + static LIR_Opr L0_oop_opr; + static LIR_Opr L1_oop_opr; + static LIR_Opr L2_oop_opr; + static LIR_Opr L3_oop_opr; + static LIR_Opr L4_oop_opr; + static LIR_Opr L5_oop_opr; + static LIR_Opr L6_oop_opr; + static LIR_Opr L7_oop_opr; + static LIR_Opr I0_oop_opr; + static LIR_Opr I1_oop_opr; + static LIR_Opr I2_oop_opr; + static LIR_Opr I3_oop_opr; + static LIR_Opr I4_oop_opr; + static LIR_Opr I5_oop_opr; + static LIR_Opr I6_oop_opr; + static LIR_Opr I7_oop_opr; + + static LIR_Opr G0_metadata_opr; + static LIR_Opr G1_metadata_opr; + static LIR_Opr G2_metadata_opr; + static LIR_Opr G3_metadata_opr; + static LIR_Opr G4_metadata_opr; + static LIR_Opr G5_metadata_opr; + static LIR_Opr G6_metadata_opr; + static LIR_Opr G7_metadata_opr; + static LIR_Opr O0_metadata_opr; + static LIR_Opr O1_metadata_opr; + static LIR_Opr O2_metadata_opr; + static LIR_Opr O3_metadata_opr; + static LIR_Opr O4_metadata_opr; + static LIR_Opr O5_metadata_opr; + static LIR_Opr O6_metadata_opr; + static LIR_Opr O7_metadata_opr; + static LIR_Opr L0_metadata_opr; + static LIR_Opr L1_metadata_opr; + static LIR_Opr L2_metadata_opr; + static LIR_Opr L3_metadata_opr; + static LIR_Opr L4_metadata_opr; + static LIR_Opr L5_metadata_opr; + static LIR_Opr L6_metadata_opr; + static LIR_Opr L7_metadata_opr; + static LIR_Opr I0_metadata_opr; + static LIR_Opr I1_metadata_opr; + static LIR_Opr I2_metadata_opr; + static LIR_Opr I3_metadata_opr; + static LIR_Opr I4_metadata_opr; + static LIR_Opr I5_metadata_opr; + static LIR_Opr I6_metadata_opr; + static LIR_Opr I7_metadata_opr; + + static LIR_Opr in_long_opr; + static LIR_Opr out_long_opr; + static LIR_Opr g1_long_single_opr; + + static LIR_Opr F0_opr; + static LIR_Opr F0_double_opr; + + static LIR_Opr Oexception_opr; + static LIR_Opr Oissuing_pc_opr; + + private: + static FloatRegister _fpu_regs [nof_fpu_regs]; + + static LIR_Opr as_long_single_opr(Register r) { + return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); + } + static LIR_Opr as_long_pair_opr(Register r) { + return LIR_OprFact::double_cpu(cpu_reg2rnr(r->successor()), cpu_reg2rnr(r)); + } + + public: + + static LIR_Opr as_long_opr(Register r) { + return as_long_single_opr(r); + } + static LIR_Opr as_pointer_opr(Register r) { + return as_long_single_opr(r); + } + static LIR_Opr as_float_opr(FloatRegister r) { + return LIR_OprFact::single_fpu(r->encoding()); + } + static LIR_Opr as_double_opr(FloatRegister r) { + return LIR_OprFact::double_fpu(r->successor()->encoding(), r->encoding()); + } + + static FloatRegister nr2floatreg (int rnr); + + static VMReg fpu_regname (int n); + + static bool is_caller_save_register (LIR_Opr reg); + static bool is_caller_save_register (Register r); + + static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; } + static int last_cpu_reg() { return pd_last_cpu_reg; } + +#endif // CPU_SPARC_C1_FRAMEMAP_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp --- a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp 2023-04-16 11:42:11.058218611 +0000 @@ -0,0 +1,3347 @@ +/* + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "c1/c1_Compilation.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "c1/c1_ValueStack.hpp" +#include "ci/ciArrayKlass.hpp" +#include "ci/ciInstance.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "memory/universe.hpp" +#include "nativeInst_sparc.hpp" +#include "oops/objArrayKlass.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/jniHandles.inline.hpp" +#include "runtime/safepointMechanism.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/powerOfTwo.hpp" + +#define __ _masm-> + + +//------------------------------------------------------------ + + +bool LIR_Assembler::is_small_constant(LIR_Opr opr) { + if (opr->is_constant()) { + LIR_Const* constant = opr->as_constant_ptr(); + switch (constant->type()) { + case T_INT: { + jint value = constant->as_jint(); + return Assembler::is_simm13(value); + } + + default: + return false; + } + } + return false; +} + + +bool LIR_Assembler::is_single_instruction(LIR_Op* op) { + switch (op->code()) { + case lir_null_check: + return true; + + + case lir_add: + case lir_ushr: + case lir_shr: + case lir_shl: + // integer shifts and adds are always one instruction + return op->result_opr()->is_single_cpu(); + + + case lir_move: { + LIR_Op1* op1 = op->as_Op1(); + LIR_Opr src = op1->in_opr(); + LIR_Opr dst = op1->result_opr(); + + if (src == dst) { + NEEDS_CLEANUP; + // this works around a problem where moves with the same src and dst + // end up in the delay slot and then the assembler swallows the mov + // since it has no effect and then it complains because the delay slot + // is empty. returning false stops the optimizer from putting this in + // the delay slot + return false; + } + + // don't put moves involving oops into the delay slot since the VerifyOops code + // will make it much larger than a single instruction. + if (VerifyOops) { + return false; + } + + if (src->is_double_cpu() || dst->is_double_cpu() || op1->patch_code() != lir_patch_none || + ((src->is_double_fpu() || dst->is_double_fpu()) && op1->move_kind() != lir_move_normal)) { + return false; + } + + if (UseCompressedOops) { + if (dst->is_address() && !dst->is_stack() && is_reference_type(dst->type())) return false; + if (src->is_address() && !src->is_stack() && is_reference_type(src->type())) return false; + } + + if (UseCompressedClassPointers) { + if (src->is_address() && !src->is_stack() && src->type() == T_ADDRESS && + src->as_address_ptr()->disp() == oopDesc::klass_offset_in_bytes()) return false; + } + + if (dst->is_register()) { + if (src->is_address() && Assembler::is_simm13(src->as_address_ptr()->disp())) { + return !PatchALot; + } else if (src->is_single_stack()) { + return true; + } + } + + if (src->is_register()) { + if (dst->is_address() && Assembler::is_simm13(dst->as_address_ptr()->disp())) { + return !PatchALot; + } else if (dst->is_single_stack()) { + return true; + } + } + + if (dst->is_register() && + ((src->is_register() && src->is_single_word() && src->is_same_type(dst)) || + (src->is_constant() && LIR_Assembler::is_small_constant(op->as_Op1()->in_opr())))) { + return true; + } + + return false; + } + + default: + return false; + } + ShouldNotReachHere(); +} + + +LIR_Opr LIR_Assembler::receiverOpr() { + return FrameMap::O0_oop_opr; +} + + +LIR_Opr LIR_Assembler::osrBufferPointer() { + return FrameMap::I0_opr; +} + + +int LIR_Assembler::initial_frame_size_in_bytes() const { + return in_bytes(frame_map()->framesize_in_bytes()); +} + + +// inline cache check: the inline cached class is in G5_inline_cache_reg(G5); +// we fetch the class of the receiver (O0) and compare it with the cached class. +// If they do not match we jump to slow case. +int LIR_Assembler::check_icache() { + int offset = __ offset(); + __ inline_cache_check(O0, G5_inline_cache_reg); + return offset; +} + +void LIR_Assembler::clinit_barrier(ciMethod* method) { + ShouldNotReachHere(); // not implemented +} + +void LIR_Assembler::osr_entry() { + // On-stack-replacement entry sequence (interpreter frame layout described in interpreter_sparc.cpp): + // + // 1. Create a new compiled activation. + // 2. Initialize local variables in the compiled activation. The expression stack must be empty + // at the osr_bci; it is not initialized. + // 3. Jump to the continuation address in compiled code to resume execution. + + // OSR entry point + offsets()->set_value(CodeOffsets::OSR_Entry, code_offset()); + BlockBegin* osr_entry = compilation()->hir()->osr_entry(); + ValueStack* entry_state = osr_entry->end()->state(); + int number_of_locks = entry_state->locks_size(); + + // Create a frame for the compiled activation. + __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); + + // OSR buffer is + // + // locals[nlocals-1..0] + // monitors[number_of_locks-1..0] + // + // locals is a direct copy of the interpreter frame so in the osr buffer + // so first slot in the local array is the last local from the interpreter + // and last slot is local[0] (receiver) from the interpreter + // + // Similarly with locks. The first lock slot in the osr buffer is the nth lock + // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock + // in the interpreter frame (the method lock if a sync method) + + // Initialize monitors in the compiled activation. + // I0: pointer to osr buffer + // + // All other registers are dead at this point and the locals will be + // copied into place by code emitted in the IR. + + Register OSR_buf = osrBufferPointer()->as_register(); + { assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below"); + int monitor_offset = BytesPerWord * method()->max_locals() + + (2 * BytesPerWord) * (number_of_locks - 1); + // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in + // the OSR buffer using 2 word entries: first the lock and then + // the oop. + for (int i = 0; i < number_of_locks; i++) { + int slot_offset = monitor_offset - ((i * 2) * BytesPerWord); +#ifdef ASSERT + // verify the interpreter's monitor has a non-null object + { + Label L; + __ ld_ptr(OSR_buf, slot_offset + 1*BytesPerWord, O7); + __ cmp_and_br_short(O7, G0, Assembler::notEqual, Assembler::pt, L); + __ stop("locked object is NULL"); + __ bind(L); + } +#endif // ASSERT + // Copy the lock field into the compiled activation. + __ ld_ptr(OSR_buf, slot_offset + 0, O7); + __ st_ptr(O7, frame_map()->address_for_monitor_lock(i)); + __ ld_ptr(OSR_buf, slot_offset + 1*BytesPerWord, O7); + __ st_ptr(O7, frame_map()->address_for_monitor_object(i)); + } + } +} + + +// -------------------------------------------------------------------------------------------- + +void LIR_Assembler::monitorexit(LIR_Opr obj_opr, LIR_Opr lock_opr, Register hdr, int monitor_no) { + if (!GenerateSynchronizationCode) return; + + Register obj_reg = obj_opr->as_register(); + Register lock_reg = lock_opr->as_register(); + + Address mon_addr = frame_map()->address_for_monitor_lock(monitor_no); + Register reg = mon_addr.base(); + int offset = mon_addr.disp(); + // compute pointer to BasicLock + if (mon_addr.is_simm13()) { + __ add(reg, offset, lock_reg); + } + else { + __ set(offset, lock_reg); + __ add(reg, lock_reg, lock_reg); + } + // unlock object + MonitorAccessStub* slow_case = new MonitorExitStub(lock_opr, UseFastLocking, monitor_no); + // _slow_case_stubs->append(slow_case); + // temporary fix: must be created after exceptionhandler, therefore as call stub + _slow_case_stubs->append(slow_case); + if (UseFastLocking) { + // try inlined fast unlocking first, revert to slow locking if it fails + // note: lock_reg points to the displaced header since the displaced header offset is 0! + assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); + __ unlock_object(hdr, obj_reg, lock_reg, *slow_case->entry()); + } else { + // always do slow unlocking + // note: the slow unlocking code could be inlined here, however if we use + // slow unlocking, speed doesn't matter anyway and this solution is + // simpler and requires less duplicated code - additionally, the + // slow unlocking code is the same in either case which simplifies + // debugging + __ br(Assembler::always, false, Assembler::pt, *slow_case->entry()); + __ delayed()->nop(); + } + // done + __ bind(*slow_case->continuation()); +} + + +int LIR_Assembler::emit_exception_handler() { + // if the last instruction is a call (typically to do a throw which + // is coming at the end after block reordering) the return address + // must still point into the code area in order to avoid assertion + // failures when searching for the corresponding bci => add a nop + // (was bug 5/14/1999 - gri) + __ nop(); + + // generate code for exception handler + ciMethod* method = compilation()->method(); + + address handler_base = __ start_a_stub(exception_handler_size()); + + if (handler_base == NULL) { + // not enough space left for the handler + bailout("exception handler overflow"); + return -1; + } + + int offset = code_offset(); + + __ call(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id), relocInfo::runtime_call_type); + __ delayed()->nop(); + __ should_not_reach_here(); + guarantee(code_offset() - offset <= exception_handler_size(), "overflow"); + __ end_a_stub(); + + return offset; +} + + +// Emit the code to remove the frame from the stack in the exception +// unwind path. +int LIR_Assembler::emit_unwind_handler() { +#ifndef PRODUCT + if (CommentedAssembly) { + _masm->block_comment("Unwind handler"); + } +#endif + + int offset = code_offset(); + + // Fetch the exception from TLS and clear out exception related thread state + __ ld_ptr(G2_thread, in_bytes(JavaThread::exception_oop_offset()), O0); + __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_oop_offset())); + __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_pc_offset())); + + __ bind(_unwind_handler_entry); + __ verify_not_null_oop(O0); + if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) { + __ mov(O0, I0); // Preserve the exception + } + + // Perform needed unlocking + MonitorExitStub* stub = NULL; + if (method()->is_synchronized()) { + monitor_address(0, FrameMap::I1_opr); + stub = new MonitorExitStub(FrameMap::I1_opr, true, 0); + __ unlock_object(I3, I2, I1, *stub->entry()); + __ bind(*stub->continuation()); + } + + if (compilation()->env()->dtrace_method_probes()) { + __ mov(G2_thread, O0); + __ save_thread(I1); // need to preserve thread in G2 across + // runtime call + metadata2reg(method()->constant_encoding(), O1); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), relocInfo::runtime_call_type); + __ delayed()->nop(); + __ restore_thread(I1); + } + + if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) { + __ mov(I0, O0); // Restore the exception + } + + // dispatch to the unwind logic + __ call(Runtime1::entry_for(Runtime1::unwind_exception_id), relocInfo::runtime_call_type); + __ delayed()->nop(); + + // Emit the slow path assembly + if (stub != NULL) { + stub->emit_code(this); + } + + return offset; +} + + +int LIR_Assembler::emit_deopt_handler() { + // if the last instruction is a call (typically to do a throw which + // is coming at the end after block reordering) the return address + // must still point into the code area in order to avoid assertion + // failures when searching for the corresponding bci => add a nop + // (was bug 5/14/1999 - gri) + __ nop(); + + // generate code for deopt handler + ciMethod* method = compilation()->method(); + address handler_base = __ start_a_stub(deopt_handler_size()); + if (handler_base == NULL) { + // not enough space left for the handler + bailout("deopt handler overflow"); + return -1; + } + + int offset = code_offset(); + AddressLiteral deopt_blob(SharedRuntime::deopt_blob()->unpack()); + __ JUMP(deopt_blob, G3_scratch, 0); // sethi;jmp + __ delayed()->nop(); + guarantee(code_offset() - offset <= deopt_handler_size(), "overflow"); + __ end_a_stub(); + + return offset; +} + + +void LIR_Assembler::jobject2reg(jobject o, Register reg) { + if (o == NULL) { + __ set(NULL_WORD, reg); + } else { +#ifdef ASSERT + { + ThreadInVMfromNative tiv(JavaThread::current()); + assert(Universe::heap()->is_in(JNIHandles::resolve(o)), "should be real oop"); + } +#endif + int oop_index = __ oop_recorder()->find_index(o); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + __ set(NULL_WORD, reg, rspec); // Will be set when the nmethod is created + } +} + + +void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) { + // Allocate a new index in table to hold the object once it's been patched + int oop_index = __ oop_recorder()->allocate_oop_index(NULL); + PatchingStub* patch = new PatchingStub(_masm, patching_id(info), oop_index); + + AddressLiteral addrlit(NULL, oop_Relocation::spec(oop_index)); + assert(addrlit.rspec().type() == relocInfo::oop_type, "must be an oop reloc"); + // It may not seem necessary to use a sethi/add pair to load a NULL into dest, but the + // NULL will be dynamically patched later and the patched value may be large. We must + // therefore generate the sethi/add as a placeholders + __ patchable_set(addrlit, reg); + + patching_epilog(patch, lir_patch_normal, reg, info); +} + + +void LIR_Assembler::metadata2reg(Metadata* o, Register reg) { + __ set_metadata_constant(o, reg); +} + +void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo *info) { + // Allocate a new index in table to hold the klass once it's been patched + int index = __ oop_recorder()->allocate_metadata_index(NULL); + PatchingStub* patch = new PatchingStub(_masm, PatchingStub::load_klass_id, index); + AddressLiteral addrlit(NULL, metadata_Relocation::spec(index)); + assert(addrlit.rspec().type() == relocInfo::metadata_type, "must be an metadata reloc"); + // It may not seem necessary to use a sethi/add pair to load a NULL into dest, but the + // NULL will be dynamically patched later and the patched value may be large. We must + // therefore generate the sethi/add as a placeholders + __ patchable_set(addrlit, reg); + + patching_epilog(patch, lir_patch_normal, reg, info); +} + +void LIR_Assembler::emit_op3(LIR_Op3* op) { + switch (op->code()) { + case lir_idiv: + case lir_irem: // Both idiv & irem are handled after the switch (below). + break; + case lir_fmaf: + __ fmadd(FloatRegisterImpl::S, + op->in_opr1()->as_float_reg(), + op->in_opr2()->as_float_reg(), + op->in_opr3()->as_float_reg(), + op->result_opr()->as_float_reg()); + return; + case lir_fmad: + __ fmadd(FloatRegisterImpl::D, + op->in_opr1()->as_double_reg(), + op->in_opr2()->as_double_reg(), + op->in_opr3()->as_double_reg(), + op->result_opr()->as_double_reg()); + return; + default: + ShouldNotReachHere(); + break; + } + + // Handle idiv & irem: + + Register Rdividend = op->in_opr1()->as_register(); + Register Rdivisor = noreg; + Register Rscratch = op->in_opr3()->as_register(); + Register Rresult = op->result_opr()->as_register(); + int divisor = -1; + + if (op->in_opr2()->is_register()) { + Rdivisor = op->in_opr2()->as_register(); + } else { + divisor = op->in_opr2()->as_constant_ptr()->as_jint(); + assert(Assembler::is_simm13(divisor), "can only handle simm13"); + } + + assert(Rdividend != Rscratch, ""); + assert(Rdivisor != Rscratch, ""); + assert(op->code() == lir_idiv || op->code() == lir_irem, "Must be irem or idiv"); + + if (Rdivisor == noreg && is_power_of_2(divisor)) { + // convert division by a power of two into some shifts and logical operations + if (op->code() == lir_idiv) { + if (divisor == 2) { + __ srl(Rdividend, 31, Rscratch); + } else { + __ sra(Rdividend, 31, Rscratch); + __ and3(Rscratch, divisor - 1, Rscratch); + } + __ add(Rdividend, Rscratch, Rscratch); + __ sra(Rscratch, log2i_exact(divisor), Rresult); + return; + } else { + if (divisor == 2) { + __ srl(Rdividend, 31, Rscratch); + } else { + __ sra(Rdividend, 31, Rscratch); + __ and3(Rscratch, divisor - 1,Rscratch); + } + __ add(Rdividend, Rscratch, Rscratch); + __ andn(Rscratch, divisor - 1,Rscratch); + __ sub(Rdividend, Rscratch, Rresult); + return; + } + } + + __ sra(Rdividend, 31, Rscratch); + __ wry(Rscratch); + + add_debug_info_for_div0_here(op->info()); + + if (Rdivisor != noreg) { + __ sdivcc(Rdividend, Rdivisor, (op->code() == lir_idiv ? Rresult : Rscratch)); + } else { + assert(Assembler::is_simm13(divisor), "can only handle simm13"); + __ sdivcc(Rdividend, divisor, (op->code() == lir_idiv ? Rresult : Rscratch)); + } + + Label skip; + __ br(Assembler::overflowSet, true, Assembler::pn, skip); + __ delayed()->Assembler::sethi(0x80000000, (op->code() == lir_idiv ? Rresult : Rscratch)); + __ bind(skip); + + if (op->code() == lir_irem) { + if (Rdivisor != noreg) { + __ smul(Rscratch, Rdivisor, Rscratch); + } else { + __ smul(Rscratch, divisor, Rscratch); + } + __ sub(Rdividend, Rscratch, Rresult); + } +} + + +void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { +#ifdef ASSERT + assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label"); + if (op->block() != NULL) _branch_target_blocks.append(op->block()); + if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock()); +#endif + assert(op->info() == NULL, "shouldn't have CodeEmitInfo"); + + if (op->cond() == lir_cond_always) { + __ br(Assembler::always, false, Assembler::pt, *(op->label())); + } else if (op->code() == lir_cond_float_branch) { + assert(op->ublock() != NULL, "must have unordered successor"); + bool is_unordered = (op->ublock() == op->block()); + Assembler::Condition acond; + switch (op->cond()) { + case lir_cond_equal: acond = Assembler::f_equal; break; + case lir_cond_notEqual: acond = Assembler::f_notEqual; break; + case lir_cond_less: acond = (is_unordered ? Assembler::f_unorderedOrLess : Assembler::f_less); break; + case lir_cond_greater: acond = (is_unordered ? Assembler::f_unorderedOrGreater : Assembler::f_greater); break; + case lir_cond_lessEqual: acond = (is_unordered ? Assembler::f_unorderedOrLessOrEqual : Assembler::f_lessOrEqual); break; + case lir_cond_greaterEqual: acond = (is_unordered ? Assembler::f_unorderedOrGreaterOrEqual: Assembler::f_greaterOrEqual); break; + default : ShouldNotReachHere(); + } + __ fb( acond, false, Assembler::pn, *(op->label())); + } else { + assert (op->code() == lir_branch, "just checking"); + + Assembler::Condition acond; + switch (op->cond()) { + case lir_cond_equal: acond = Assembler::equal; break; + case lir_cond_notEqual: acond = Assembler::notEqual; break; + case lir_cond_less: acond = Assembler::less; break; + case lir_cond_lessEqual: acond = Assembler::lessEqual; break; + case lir_cond_greaterEqual: acond = Assembler::greaterEqual; break; + case lir_cond_greater: acond = Assembler::greater; break; + case lir_cond_aboveEqual: acond = Assembler::greaterEqualUnsigned; break; + case lir_cond_belowEqual: acond = Assembler::lessEqualUnsigned; break; + default: ShouldNotReachHere(); + }; + + // sparc has different condition codes for testing 32-bit + // vs. 64-bit values. We could always test xcc is we could + // guarantee that 32-bit loads always sign extended but that isn't + // true and since sign extension isn't free, it would impose a + // slight cost. + if (op->type() == T_INT) { + __ br(acond, false, Assembler::pn, *(op->label())); + } else + __ brx(acond, false, Assembler::pn, *(op->label())); + } + // The peephole pass fills the delay slot +} + + +void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + Bytecodes::Code code = op->bytecode(); + LIR_Opr dst = op->result_opr(); + + switch(code) { + case Bytecodes::_i2l: { + Register rlo = dst->as_register_lo(); + Register rhi = dst->as_register_hi(); + Register rval = op->in_opr()->as_register(); + __ sra(rval, 0, rlo); + break; + } + case Bytecodes::_i2d: + case Bytecodes::_i2f: { + bool is_double = (code == Bytecodes::_i2d); + FloatRegister rdst = is_double ? dst->as_double_reg() : dst->as_float_reg(); + FloatRegisterImpl::Width w = is_double ? FloatRegisterImpl::D : FloatRegisterImpl::S; + FloatRegister rsrc = op->in_opr()->as_float_reg(); + if (rsrc != rdst) { + __ fmov(FloatRegisterImpl::S, rsrc, rdst); + } + __ fitof(w, rdst, rdst); + break; + } + case Bytecodes::_f2i:{ + FloatRegister rsrc = op->in_opr()->as_float_reg(); + Address addr = frame_map()->address_for_slot(dst->single_stack_ix()); + Label L; + // result must be 0 if value is NaN; test by comparing value to itself + __ fcmp(FloatRegisterImpl::S, Assembler::fcc0, rsrc, rsrc); + __ fb(Assembler::f_unordered, true, Assembler::pn, L); + __ delayed()->st(G0, addr); // annulled if contents of rsrc is not NaN + __ ftoi(FloatRegisterImpl::S, rsrc, rsrc); + // move integer result from float register to int register + __ stf(FloatRegisterImpl::S, rsrc, addr.base(), addr.disp()); + __ bind (L); + break; + } + case Bytecodes::_l2i: { + Register rlo = op->in_opr()->as_register_lo(); + Register rhi = op->in_opr()->as_register_hi(); + Register rdst = dst->as_register(); + __ sra(rlo, 0, rdst); + break; + } + case Bytecodes::_d2f: + case Bytecodes::_f2d: { + bool is_double = (code == Bytecodes::_f2d); + assert((!is_double && dst->is_single_fpu()) || (is_double && dst->is_double_fpu()), "check"); + LIR_Opr val = op->in_opr(); + FloatRegister rval = (code == Bytecodes::_d2f) ? val->as_double_reg() : val->as_float_reg(); + FloatRegister rdst = is_double ? dst->as_double_reg() : dst->as_float_reg(); + FloatRegisterImpl::Width vw = is_double ? FloatRegisterImpl::S : FloatRegisterImpl::D; + FloatRegisterImpl::Width dw = is_double ? FloatRegisterImpl::D : FloatRegisterImpl::S; + __ ftof(vw, dw, rval, rdst); + break; + } + case Bytecodes::_i2s: + case Bytecodes::_i2b: { + Register rval = op->in_opr()->as_register(); + Register rdst = dst->as_register(); + int shift = (code == Bytecodes::_i2b) ? (BitsPerInt - T_BYTE_aelem_bytes * BitsPerByte) : (BitsPerInt - BitsPerShort); + __ sll (rval, shift, rdst); + __ sra (rdst, shift, rdst); + break; + } + case Bytecodes::_i2c: { + Register rval = op->in_opr()->as_register(); + Register rdst = dst->as_register(); + int shift = BitsPerInt - T_CHAR_aelem_bytes * BitsPerByte; + __ sll (rval, shift, rdst); + __ srl (rdst, shift, rdst); + break; + } + + default: ShouldNotReachHere(); + } +} + + +void LIR_Assembler::align_call(LIR_Code) { + // do nothing since all instructions are word aligned on sparc +} + + +void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { + __ call(op->addr(), rtype); + // The peephole pass fills the delay slot, add_call_info is done in + // LIR_Assembler::emit_delay. +} + + +void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { + __ ic_call(op->addr(), false); + // The peephole pass fills the delay slot, add_call_info is done in + // LIR_Assembler::emit_delay. +} + + +int LIR_Assembler::store(LIR_Opr from_reg, Register base, int offset, BasicType type, bool wide, bool unaligned) { + int store_offset; + if (!Assembler::is_simm13(offset + (type == T_LONG) ? wordSize : 0)) { + assert(base != O7, "destroying register"); + assert(!unaligned, "can't handle this"); + // for offsets larger than a simm13 we setup the offset in O7 + __ set(offset, O7); + store_offset = store(from_reg, base, O7, type, wide); + } else { + if (is_reference_type(type)) { + __ verify_oop(from_reg->as_register()); + } + store_offset = code_offset(); + switch (type) { + case T_BOOLEAN: // fall through + case T_BYTE : __ stb(from_reg->as_register(), base, offset); break; + case T_CHAR : __ sth(from_reg->as_register(), base, offset); break; + case T_SHORT : __ sth(from_reg->as_register(), base, offset); break; + case T_INT : __ stw(from_reg->as_register(), base, offset); break; + case T_LONG : + if (unaligned || PatchALot) { + // Don't use O7 here because it may be equal to 'base' (see LIR_Assembler::reg2mem) + assert(G3_scratch != base, "can't handle this"); + assert(G3_scratch != from_reg->as_register_lo(), "can't handle this"); + __ srax(from_reg->as_register_lo(), 32, G3_scratch); + __ stw(from_reg->as_register_lo(), base, offset + lo_word_offset_in_bytes); + __ stw(G3_scratch, base, offset + hi_word_offset_in_bytes); + } else { + __ stx(from_reg->as_register_lo(), base, offset); + } + break; + case T_ADDRESS: + case T_METADATA: + __ st_ptr(from_reg->as_register(), base, offset); + break; + case T_ARRAY : // fall through + case T_OBJECT: + { + if (UseCompressedOops && !wide) { + __ encode_heap_oop(from_reg->as_register(), G3_scratch); + store_offset = code_offset(); + __ stw(G3_scratch, base, offset); + } else { + __ st_ptr(from_reg->as_register(), base, offset); + } + break; + } + + case T_FLOAT : __ stf(FloatRegisterImpl::S, from_reg->as_float_reg(), base, offset); break; + case T_DOUBLE: + { + FloatRegister reg = from_reg->as_double_reg(); + // split unaligned stores + if (unaligned || PatchALot) { + assert(Assembler::is_simm13(offset + 4), "must be"); + __ stf(FloatRegisterImpl::S, reg->successor(), base, offset + 4); + __ stf(FloatRegisterImpl::S, reg, base, offset); + } else { + __ stf(FloatRegisterImpl::D, reg, base, offset); + } + break; + } + default : ShouldNotReachHere(); + } + } + return store_offset; +} + + +int LIR_Assembler::store(LIR_Opr from_reg, Register base, Register disp, BasicType type, bool wide) { + if (is_reference_type(type)) { + __ verify_oop(from_reg->as_register()); + } + int store_offset = code_offset(); + switch (type) { + case T_BOOLEAN: // fall through + case T_BYTE : __ stb(from_reg->as_register(), base, disp); break; + case T_CHAR : __ sth(from_reg->as_register(), base, disp); break; + case T_SHORT : __ sth(from_reg->as_register(), base, disp); break; + case T_INT : __ stw(from_reg->as_register(), base, disp); break; + case T_LONG : + __ stx(from_reg->as_register_lo(), base, disp); + break; + case T_ADDRESS: + __ st_ptr(from_reg->as_register(), base, disp); + break; + case T_ARRAY : // fall through + case T_OBJECT: + { + if (UseCompressedOops && !wide) { + __ encode_heap_oop(from_reg->as_register(), G3_scratch); + store_offset = code_offset(); + __ stw(G3_scratch, base, disp); + } else { + __ st_ptr(from_reg->as_register(), base, disp); + } + break; + } + case T_FLOAT : __ stf(FloatRegisterImpl::S, from_reg->as_float_reg(), base, disp); break; + case T_DOUBLE: __ stf(FloatRegisterImpl::D, from_reg->as_double_reg(), base, disp); break; + default : ShouldNotReachHere(); + } + return store_offset; +} + + +int LIR_Assembler::load(Register base, int offset, LIR_Opr to_reg, BasicType type, bool wide, bool unaligned) { + int load_offset; + if (!Assembler::is_simm13(offset + (type == T_LONG) ? wordSize : 0)) { + assert(base != O7, "destroying register"); + assert(!unaligned, "can't handle this"); + // for offsets larger than a simm13 we setup the offset in O7 + __ set(offset, O7); + load_offset = load(base, O7, to_reg, type, wide); + } else { + load_offset = code_offset(); + switch(type) { + case T_BOOLEAN: // fall through + case T_BYTE : __ ldsb(base, offset, to_reg->as_register()); break; + case T_CHAR : __ lduh(base, offset, to_reg->as_register()); break; + case T_SHORT : __ ldsh(base, offset, to_reg->as_register()); break; + case T_INT : __ ld(base, offset, to_reg->as_register()); break; + case T_LONG : + if (!unaligned && !PatchALot) { + __ ldx(base, offset, to_reg->as_register_lo()); + } else { + assert(base != to_reg->as_register_lo(), "can't handle this"); + assert(O7 != to_reg->as_register_lo(), "can't handle this"); + __ ld(base, offset + hi_word_offset_in_bytes, to_reg->as_register_lo()); + __ lduw(base, offset + lo_word_offset_in_bytes, O7); // in case O7 is base or offset, use it last + __ sllx(to_reg->as_register_lo(), 32, to_reg->as_register_lo()); + __ or3(to_reg->as_register_lo(), O7, to_reg->as_register_lo()); + } + break; + case T_METADATA: __ ld_ptr(base, offset, to_reg->as_register()); break; + case T_ADDRESS: + __ ld_ptr(base, offset, to_reg->as_register()); + break; + case T_ARRAY : // fall through + case T_OBJECT: + { + if (UseCompressedOops && !wide) { + __ lduw(base, offset, to_reg->as_register()); + __ decode_heap_oop(to_reg->as_register()); + } else { + __ ld_ptr(base, offset, to_reg->as_register()); + } + break; + } + case T_FLOAT: __ ldf(FloatRegisterImpl::S, base, offset, to_reg->as_float_reg()); break; + case T_DOUBLE: + { + FloatRegister reg = to_reg->as_double_reg(); + // split unaligned loads + if (unaligned || PatchALot) { + __ ldf(FloatRegisterImpl::S, base, offset + 4, reg->successor()); + __ ldf(FloatRegisterImpl::S, base, offset, reg); + } else { + __ ldf(FloatRegisterImpl::D, base, offset, to_reg->as_double_reg()); + } + break; + } + default : ShouldNotReachHere(); + } + if (is_reference_type(type)) { + __ verify_oop(to_reg->as_register()); + } + } + return load_offset; +} + + +int LIR_Assembler::load(Register base, Register disp, LIR_Opr to_reg, BasicType type, bool wide) { + int load_offset = code_offset(); + switch(type) { + case T_BOOLEAN: // fall through + case T_BYTE : __ ldsb(base, disp, to_reg->as_register()); break; + case T_CHAR : __ lduh(base, disp, to_reg->as_register()); break; + case T_SHORT : __ ldsh(base, disp, to_reg->as_register()); break; + case T_INT : __ ld(base, disp, to_reg->as_register()); break; + case T_ADDRESS: __ ld_ptr(base, disp, to_reg->as_register()); break; + case T_ARRAY : // fall through + case T_OBJECT: + { + if (UseCompressedOops && !wide) { + __ lduw(base, disp, to_reg->as_register()); + __ decode_heap_oop(to_reg->as_register()); + } else { + __ ld_ptr(base, disp, to_reg->as_register()); + } + break; + } + case T_FLOAT: __ ldf(FloatRegisterImpl::S, base, disp, to_reg->as_float_reg()); break; + case T_DOUBLE: __ ldf(FloatRegisterImpl::D, base, disp, to_reg->as_double_reg()); break; + case T_LONG : + __ ldx(base, disp, to_reg->as_register_lo()); + break; + default : ShouldNotReachHere(); + } + if (is_reference_type(type)) { + __ verify_oop(to_reg->as_register()); + } + return load_offset; +} + +void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) { + LIR_Const* c = src->as_constant_ptr(); + switch (c->type()) { + case T_INT: + case T_FLOAT: { + Register src_reg = O7; + int value = c->as_jint_bits(); + if (value == 0) { + src_reg = G0; + } else { + __ set(value, O7); + } + Address addr = frame_map()->address_for_slot(dest->single_stack_ix()); + __ stw(src_reg, addr.base(), addr.disp()); + break; + } + case T_ADDRESS: { + Register src_reg = O7; + int value = c->as_jint_bits(); + if (value == 0) { + src_reg = G0; + } else { + __ set(value, O7); + } + Address addr = frame_map()->address_for_slot(dest->single_stack_ix()); + __ st_ptr(src_reg, addr.base(), addr.disp()); + break; + } + case T_OBJECT: { + Register src_reg = O7; + jobject2reg(c->as_jobject(), src_reg); + Address addr = frame_map()->address_for_slot(dest->single_stack_ix()); + __ st_ptr(src_reg, addr.base(), addr.disp()); + break; + } + case T_LONG: + case T_DOUBLE: { + Address addr = frame_map()->address_for_double_slot(dest->double_stack_ix()); + + Register tmp = O7; + int value_lo = c->as_jint_lo_bits(); + if (value_lo == 0) { + tmp = G0; + } else { + __ set(value_lo, O7); + } + __ stw(tmp, addr.base(), addr.disp() + lo_word_offset_in_bytes); + int value_hi = c->as_jint_hi_bits(); + if (value_hi == 0) { + tmp = G0; + } else { + __ set(value_hi, O7); + } + __ stw(tmp, addr.base(), addr.disp() + hi_word_offset_in_bytes); + break; + } + default: + Unimplemented(); + } +} + + +void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info, bool wide) { + LIR_Const* c = src->as_constant_ptr(); + LIR_Address* addr = dest->as_address_ptr(); + Register base = addr->base()->as_pointer_register(); + int offset = -1; + + switch (c->type()) { + case T_FLOAT: type = T_INT; // Float constants are stored by int store instructions. + case T_INT: + case T_ADDRESS: { + LIR_Opr tmp = FrameMap::O7_opr; + int value = c->as_jint_bits(); + if (value == 0) { + tmp = FrameMap::G0_opr; + } else if (Assembler::is_simm13(value)) { + __ set(value, O7); + } + if (addr->index()->is_valid()) { + assert(addr->disp() == 0, "must be zero"); + offset = store(tmp, base, addr->index()->as_pointer_register(), type, wide); + } else { + assert(Assembler::is_simm13(addr->disp()), "can't handle larger addresses"); + offset = store(tmp, base, addr->disp(), type, wide, false); + } + break; + } + case T_LONG: + case T_DOUBLE: { + assert(!addr->index()->is_valid(), "can't handle reg reg address here"); + assert(Assembler::is_simm13(addr->disp()) && + Assembler::is_simm13(addr->disp() + 4), "can't handle larger addresses"); + + LIR_Opr tmp = FrameMap::O7_opr; + int value_lo = c->as_jint_lo_bits(); + if (value_lo == 0) { + tmp = FrameMap::G0_opr; + } else { + __ set(value_lo, O7); + } + offset = store(tmp, base, addr->disp() + lo_word_offset_in_bytes, T_INT, wide, false); + int value_hi = c->as_jint_hi_bits(); + if (value_hi == 0) { + tmp = FrameMap::G0_opr; + } else { + __ set(value_hi, O7); + } + store(tmp, base, addr->disp() + hi_word_offset_in_bytes, T_INT, wide, false); + break; + } + case T_OBJECT: { + jobject obj = c->as_jobject(); + LIR_Opr tmp; + if (obj == NULL) { + tmp = FrameMap::G0_opr; + } else { + tmp = FrameMap::O7_opr; + jobject2reg(c->as_jobject(), O7); + } + // handle either reg+reg or reg+disp address + if (addr->index()->is_valid()) { + assert(addr->disp() == 0, "must be zero"); + offset = store(tmp, base, addr->index()->as_pointer_register(), type, wide); + } else { + assert(Assembler::is_simm13(addr->disp()), "can't handle larger addresses"); + offset = store(tmp, base, addr->disp(), type, wide, false); + } + + break; + } + default: + Unimplemented(); + } + if (info != NULL) { + assert(offset != -1, "offset should've been set"); + add_debug_info_for_null_check(offset, info); + } +} + + +void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { + LIR_Const* c = src->as_constant_ptr(); + LIR_Opr to_reg = dest; + + switch (c->type()) { + case T_INT: + case T_ADDRESS: + { + jint con = c->as_jint(); + if (to_reg->is_single_cpu()) { + assert(patch_code == lir_patch_none, "no patching handled here"); + __ set(con, to_reg->as_register()); + } else { + ShouldNotReachHere(); + assert(to_reg->is_single_fpu(), "wrong register kind"); + + __ set(con, O7); + Address temp_slot(SP, (frame::register_save_words * wordSize) + STACK_BIAS); + __ st(O7, temp_slot); + __ ldf(FloatRegisterImpl::S, temp_slot, to_reg->as_float_reg()); + } + } + break; + + case T_LONG: + { + jlong con = c->as_jlong(); + + if (to_reg->is_double_cpu()) { + __ set(con, to_reg->as_register_lo()); + } else if (to_reg->is_single_cpu()) { + __ set(con, to_reg->as_register()); + } else { + ShouldNotReachHere(); + assert(to_reg->is_double_fpu(), "wrong register kind"); + Address temp_slot_lo(SP, ((frame::register_save_words ) * wordSize) + STACK_BIAS); + Address temp_slot_hi(SP, ((frame::register_save_words) * wordSize) + (longSize/2) + STACK_BIAS); + __ set(low(con), O7); + __ st(O7, temp_slot_lo); + __ set(high(con), O7); + __ st(O7, temp_slot_hi); + __ ldf(FloatRegisterImpl::D, temp_slot_lo, to_reg->as_double_reg()); + } + } + break; + + case T_OBJECT: + { + if (patch_code == lir_patch_none) { + jobject2reg(c->as_jobject(), to_reg->as_register()); + } else { + jobject2reg_with_patching(to_reg->as_register(), info); + } + } + break; + + case T_METADATA: + { + if (patch_code == lir_patch_none) { + metadata2reg(c->as_metadata(), to_reg->as_register()); + } else { + klass2reg_with_patching(to_reg->as_register(), info); + } + } + break; + + case T_FLOAT: + { + address const_addr = __ float_constant(c->as_jfloat()); + if (const_addr == NULL) { + bailout("const section overflow"); + break; + } + RelocationHolder rspec = internal_word_Relocation::spec(const_addr); + AddressLiteral const_addrlit(const_addr, rspec); + if (to_reg->is_single_fpu()) { + __ patchable_sethi(const_addrlit, O7); + __ relocate(rspec); + __ ldf(FloatRegisterImpl::S, O7, const_addrlit.low10(), to_reg->as_float_reg()); + + } else { + assert(to_reg->is_single_cpu(), "Must be a cpu register."); + + __ set(const_addrlit, O7); + __ ld(O7, 0, to_reg->as_register()); + } + } + break; + + case T_DOUBLE: + { + address const_addr = __ double_constant(c->as_jdouble()); + if (const_addr == NULL) { + bailout("const section overflow"); + break; + } + RelocationHolder rspec = internal_word_Relocation::spec(const_addr); + + if (to_reg->is_double_fpu()) { + AddressLiteral const_addrlit(const_addr, rspec); + __ patchable_sethi(const_addrlit, O7); + __ relocate(rspec); + __ ldf (FloatRegisterImpl::D, O7, const_addrlit.low10(), to_reg->as_double_reg()); + } else { + assert(to_reg->is_double_cpu(), "Must be a long register."); + __ set(jlong_cast(c->as_jdouble()), to_reg->as_register_lo()); + } + + } + break; + + default: + ShouldNotReachHere(); + } +} + +Address LIR_Assembler::as_Address(LIR_Address* addr) { + Register reg = addr->base()->as_pointer_register(); + LIR_Opr index = addr->index(); + if (index->is_illegal()) { + return Address(reg, addr->disp()); + } else { + assert (addr->disp() == 0, "unsupported address mode"); + return Address(reg, index->as_pointer_register()); + } +} + + +void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) { + switch (type) { + case T_INT: + case T_FLOAT: { + Register tmp = O7; + Address from = frame_map()->address_for_slot(src->single_stack_ix()); + Address to = frame_map()->address_for_slot(dest->single_stack_ix()); + __ lduw(from.base(), from.disp(), tmp); + __ stw(tmp, to.base(), to.disp()); + break; + } + case T_ADDRESS: + case T_OBJECT: { + Register tmp = O7; + Address from = frame_map()->address_for_slot(src->single_stack_ix()); + Address to = frame_map()->address_for_slot(dest->single_stack_ix()); + __ ld_ptr(from.base(), from.disp(), tmp); + __ st_ptr(tmp, to.base(), to.disp()); + break; + } + case T_LONG: + case T_DOUBLE: { + Register tmp = O7; + Address from = frame_map()->address_for_double_slot(src->double_stack_ix()); + Address to = frame_map()->address_for_double_slot(dest->double_stack_ix()); + __ lduw(from.base(), from.disp(), tmp); + __ stw(tmp, to.base(), to.disp()); + __ lduw(from.base(), from.disp() + 4, tmp); + __ stw(tmp, to.base(), to.disp() + 4); + break; + } + + default: + ShouldNotReachHere(); + } +} + + +Address LIR_Assembler::as_Address_hi(LIR_Address* addr) { + Address base = as_Address(addr); + return Address(base.base(), base.disp() + hi_word_offset_in_bytes); +} + + +Address LIR_Assembler::as_Address_lo(LIR_Address* addr) { + Address base = as_Address(addr); + return Address(base.base(), base.disp() + lo_word_offset_in_bytes); +} + + +void LIR_Assembler::mem2reg(LIR_Opr src_opr, LIR_Opr dest, BasicType type, + LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool unaligned) { + + assert(type != T_METADATA, "load of metadata ptr not supported"); + LIR_Address* addr = src_opr->as_address_ptr(); + LIR_Opr to_reg = dest; + + Register src = addr->base()->as_pointer_register(); + Register disp_reg = noreg; + int disp_value = addr->disp(); + bool needs_patching = (patch_code != lir_patch_none); + + if (addr->base()->type() == T_OBJECT) { + __ verify_oop(src); + } + + PatchingStub* patch = NULL; + if (needs_patching) { + patch = new PatchingStub(_masm, PatchingStub::access_field_id); + assert(!to_reg->is_double_cpu() || + patch_code == lir_patch_none || + patch_code == lir_patch_normal, "patching doesn't match register"); + } + + if (addr->index()->is_illegal()) { + if (!Assembler::is_simm13(disp_value) && (!unaligned || Assembler::is_simm13(disp_value + 4))) { + if (needs_patching) { + __ patchable_set(0, O7); + } else { + __ set(disp_value, O7); + } + disp_reg = O7; + } + } else if (unaligned || PatchALot) { + __ add(src, addr->index()->as_pointer_register(), O7); + src = O7; + } else { + disp_reg = addr->index()->as_pointer_register(); + assert(disp_value == 0, "can't handle 3 operand addresses"); + } + + // remember the offset of the load. The patching_epilog must be done + // before the call to add_debug_info, otherwise the PcDescs don't get + // entered in increasing order. + int offset = code_offset(); + + assert(disp_reg != noreg || Assembler::is_simm13(disp_value), "should have set this up"); + if (disp_reg == noreg) { + offset = load(src, disp_value, to_reg, type, wide, unaligned); + } else { + assert(!unaligned, "can't handle this"); + offset = load(src, disp_reg, to_reg, type, wide); + } + + if (patch != NULL) { + patching_epilog(patch, patch_code, src, info); + } + if (info != NULL) add_debug_info_for_null_check(offset, info); +} + + +void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) { + Address addr; + if (src->is_single_word()) { + addr = frame_map()->address_for_slot(src->single_stack_ix()); + } else if (src->is_double_word()) { + addr = frame_map()->address_for_double_slot(src->double_stack_ix()); + } + + bool unaligned = (addr.disp() - STACK_BIAS) % 8 != 0; + load(addr.base(), addr.disp(), dest, dest->type(), true /*wide*/, unaligned); +} + + +void LIR_Assembler::reg2stack(LIR_Opr from_reg, LIR_Opr dest, BasicType type, bool pop_fpu_stack) { + Address addr; + if (dest->is_single_word()) { + addr = frame_map()->address_for_slot(dest->single_stack_ix()); + } else if (dest->is_double_word()) { + addr = frame_map()->address_for_slot(dest->double_stack_ix()); + } + bool unaligned = (addr.disp() - STACK_BIAS) % 8 != 0; + store(from_reg, addr.base(), addr.disp(), from_reg->type(), true /*wide*/, unaligned); +} + + +void LIR_Assembler::reg2reg(LIR_Opr from_reg, LIR_Opr to_reg) { + if (from_reg->is_float_kind() && to_reg->is_float_kind()) { + if (from_reg->is_double_fpu()) { + // double to double moves + assert(to_reg->is_double_fpu(), "should match"); + __ fmov(FloatRegisterImpl::D, from_reg->as_double_reg(), to_reg->as_double_reg()); + } else { + // float to float moves + assert(to_reg->is_single_fpu(), "should match"); + __ fmov(FloatRegisterImpl::S, from_reg->as_float_reg(), to_reg->as_float_reg()); + } + } else if (!from_reg->is_float_kind() && !to_reg->is_float_kind()) { + if (from_reg->is_double_cpu()) { + __ mov(from_reg->as_pointer_register(), to_reg->as_pointer_register()); + } else if (to_reg->is_double_cpu()) { + // int to int moves + __ mov(from_reg->as_register(), to_reg->as_register_lo()); + } else { + // int to int moves + __ mov(from_reg->as_register(), to_reg->as_register()); + } + } else { + ShouldNotReachHere(); + } + if (is_reference_type(to_reg->type())) { + __ verify_oop(to_reg->as_register()); + } +} + +void LIR_Assembler::reg2mem(LIR_Opr from_reg, LIR_Opr dest, BasicType type, + LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, + bool wide, bool unaligned) { + assert(type != T_METADATA, "store of metadata ptr not supported"); + LIR_Address* addr = dest->as_address_ptr(); + + Register src = addr->base()->as_pointer_register(); + Register disp_reg = noreg; + int disp_value = addr->disp(); + bool needs_patching = (patch_code != lir_patch_none); + + if (addr->base()->is_oop_register()) { + __ verify_oop(src); + } + + PatchingStub* patch = NULL; + if (needs_patching) { + patch = new PatchingStub(_masm, PatchingStub::access_field_id); + assert(!from_reg->is_double_cpu() || + patch_code == lir_patch_none || + patch_code == lir_patch_normal, "patching doesn't match register"); + } + + if (addr->index()->is_illegal()) { + if (!Assembler::is_simm13(disp_value) && (!unaligned || Assembler::is_simm13(disp_value + 4))) { + if (needs_patching) { + __ patchable_set(0, O7); + } else { + __ set(disp_value, O7); + } + disp_reg = O7; + } + } else if (unaligned || PatchALot) { + __ add(src, addr->index()->as_pointer_register(), O7); + src = O7; + } else { + disp_reg = addr->index()->as_pointer_register(); + assert(disp_value == 0, "can't handle 3 operand addresses"); + } + + // remember the offset of the store. The patching_epilog must be done + // before the call to add_debug_info_for_null_check, otherwise the PcDescs don't get + // entered in increasing order. + int offset; + + assert(disp_reg != noreg || Assembler::is_simm13(disp_value), "should have set this up"); + if (disp_reg == noreg) { + offset = store(from_reg, src, disp_value, type, wide, unaligned); + } else { + assert(!unaligned, "can't handle this"); + offset = store(from_reg, src, disp_reg, type, wide); + } + + if (patch != NULL) { + patching_epilog(patch, patch_code, src, info); + } + + if (info != NULL) add_debug_info_for_null_check(offset, info); +} + + +void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { + if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) { + __ reserved_stack_check(); + } + __ ld_ptr(Address(G2_thread, JavaThread::polling_page_offset()), L0); + __ relocate(relocInfo::poll_return_type); + __ ld_ptr(L0, 0, G0); + __ ret(); + __ delayed()->restore(); +} + + +int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { + __ ld_ptr(Address(G2_thread, JavaThread::polling_page_offset()), tmp->as_register()); + if (info != NULL) { + add_debug_info_for_branch(info); + } + int offset = __ offset(); + + __ relocate(relocInfo::poll_type); + __ ld_ptr(tmp->as_register(), 0, G0); + return offset; +} + + +void LIR_Assembler::emit_static_call_stub() { + address call_pc = __ pc(); + address stub = __ start_a_stub(call_stub_size()); + if (stub == NULL) { + bailout("static call stub overflow"); + return; + } + + int start = __ offset(); + __ relocate(static_stub_Relocation::spec(call_pc)); + + __ set_metadata(NULL, G5); + // must be set to -1 at code generation time + AddressLiteral addrlit(-1); + __ jump_to(addrlit, G3); + __ delayed()->nop(); + + assert(__ offset() - start <= call_stub_size(), "stub too big"); + __ end_a_stub(); +} + + +void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) { + if (opr1->is_single_fpu()) { + __ fcmp(FloatRegisterImpl::S, Assembler::fcc0, opr1->as_float_reg(), opr2->as_float_reg()); + } else if (opr1->is_double_fpu()) { + __ fcmp(FloatRegisterImpl::D, Assembler::fcc0, opr1->as_double_reg(), opr2->as_double_reg()); + } else if (opr1->is_single_cpu()) { + if (opr2->is_constant()) { + switch (opr2->as_constant_ptr()->type()) { + case T_INT: + { jint con = opr2->as_constant_ptr()->as_jint(); + if (Assembler::is_simm13(con)) { + __ cmp(opr1->as_register(), con); + } else { + __ set(con, O7); + __ cmp(opr1->as_register(), O7); + } + } + break; + + case T_OBJECT: + // there are only equal/notequal comparisons on objects + { jobject con = opr2->as_constant_ptr()->as_jobject(); + if (con == NULL) { + __ cmp(opr1->as_register(), 0); + } else { + jobject2reg(con, O7); + __ cmp(opr1->as_register(), O7); + } + } + break; + + case T_METADATA: + // We only need, for now, comparison with NULL for metadata. + { assert(condition == lir_cond_equal || condition == lir_cond_notEqual, "oops"); + Metadata* m = opr2->as_constant_ptr()->as_metadata(); + if (m == NULL) { + __ cmp(opr1->as_register(), 0); + } else { + ShouldNotReachHere(); + } + } + break; + + default: + ShouldNotReachHere(); + break; + } + } else { + if (opr2->is_address()) { + LIR_Address * addr = opr2->as_address_ptr(); + BasicType type = addr->type(); + if ( type == T_OBJECT ) __ ld_ptr(as_Address(addr), O7); + else __ ld(as_Address(addr), O7); + __ cmp(opr1->as_register(), O7); + } else { + __ cmp(opr1->as_register(), opr2->as_register()); + } + } + } else if (opr1->is_double_cpu()) { + Register xlo = opr1->as_register_lo(); + Register xhi = opr1->as_register_hi(); + if (opr2->is_constant() && opr2->as_jlong() == 0) { + assert(condition == lir_cond_equal || condition == lir_cond_notEqual, "only handles these cases"); + __ orcc(xhi, G0, G0); + } else if (opr2->is_register()) { + Register ylo = opr2->as_register_lo(); + Register yhi = opr2->as_register_hi(); + __ cmp(xlo, ylo); + } else { + ShouldNotReachHere(); + } + } else if (opr1->is_address()) { + LIR_Address * addr = opr1->as_address_ptr(); + BasicType type = addr->type(); + assert (opr2->is_constant(), "Checking"); + if ( type == T_OBJECT ) __ ld_ptr(as_Address(addr), O7); + else __ ld(as_Address(addr), O7); + __ cmp(O7, opr2->as_constant_ptr()->as_jint()); + } else { + ShouldNotReachHere(); + } +} + + +void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op){ + if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) { + bool is_unordered_less = (code == lir_ucmp_fd2i); + if (left->is_single_fpu()) { + __ float_cmp(true, is_unordered_less ? -1 : 1, left->as_float_reg(), right->as_float_reg(), dst->as_register()); + } else if (left->is_double_fpu()) { + __ float_cmp(false, is_unordered_less ? -1 : 1, left->as_double_reg(), right->as_double_reg(), dst->as_register()); + } else { + ShouldNotReachHere(); + } + } else if (code == lir_cmp_l2i) { + __ lcmp(left->as_register_lo(), right->as_register_lo(), dst->as_register()); + } else { + ShouldNotReachHere(); + } +} + + +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { + Assembler::Condition acond; + switch (condition) { + case lir_cond_equal: acond = Assembler::equal; break; + case lir_cond_notEqual: acond = Assembler::notEqual; break; + case lir_cond_less: acond = Assembler::less; break; + case lir_cond_lessEqual: acond = Assembler::lessEqual; break; + case lir_cond_greaterEqual: acond = Assembler::greaterEqual; break; + case lir_cond_greater: acond = Assembler::greater; break; + case lir_cond_aboveEqual: acond = Assembler::greaterEqualUnsigned; break; + case lir_cond_belowEqual: acond = Assembler::lessEqualUnsigned; break; + default: ShouldNotReachHere(); + }; + + if (opr1->is_constant() && opr1->type() == T_INT) { + Register dest = result->as_register(); + // load up first part of constant before branch + // and do the rest in the delay slot. + if (!Assembler::is_simm13(opr1->as_jint())) { + __ sethi(opr1->as_jint(), dest); + } + } else if (opr1->is_constant()) { + const2reg(opr1, result, lir_patch_none, NULL); + } else if (opr1->is_register()) { + reg2reg(opr1, result); + } else if (opr1->is_stack()) { + stack2reg(opr1, result, result->type()); + } else { + ShouldNotReachHere(); + } + Label skip; + if (type == T_INT) { + __ br(acond, false, Assembler::pt, skip); + } else { + __ brx(acond, false, Assembler::pt, skip); // checks icc on 32bit and xcc on 64bit + } + if (opr1->is_constant() && opr1->type() == T_INT) { + Register dest = result->as_register(); + if (Assembler::is_simm13(opr1->as_jint())) { + __ delayed()->or3(G0, opr1->as_jint(), dest); + } else { + // the sethi has been done above, so just put in the low 10 bits + __ delayed()->or3(dest, opr1->as_jint() & 0x3ff, dest); + } + } else { + // can't do anything useful in the delay slot + __ delayed()->nop(); + } + if (opr2->is_constant()) { + const2reg(opr2, result, lir_patch_none, NULL); + } else if (opr2->is_register()) { + reg2reg(opr2, result); + } else if (opr2->is_stack()) { + stack2reg(opr2, result, result->type()); + } else { + ShouldNotReachHere(); + } + __ bind(skip); +} + + +void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) { + assert(info == NULL, "unused on this code path"); + assert(left->is_register(), "wrong items state"); + assert(dest->is_register(), "wrong items state"); + + if (right->is_register()) { + if (dest->is_float_kind()) { + + FloatRegister lreg, rreg, res; + FloatRegisterImpl::Width w; + if (right->is_single_fpu()) { + w = FloatRegisterImpl::S; + lreg = left->as_float_reg(); + rreg = right->as_float_reg(); + res = dest->as_float_reg(); + } else { + w = FloatRegisterImpl::D; + lreg = left->as_double_reg(); + rreg = right->as_double_reg(); + res = dest->as_double_reg(); + } + + switch (code) { + case lir_add: __ fadd(w, lreg, rreg, res); break; + case lir_sub: __ fsub(w, lreg, rreg, res); break; + case lir_mul: __ fmul(w, lreg, rreg, res); break; + case lir_div: __ fdiv(w, lreg, rreg, res); break; + default: ShouldNotReachHere(); + } + + } else if (dest->is_double_cpu()) { + Register dst_lo = dest->as_register_lo(); + Register op1_lo = left->as_pointer_register(); + Register op2_lo = right->as_pointer_register(); + + switch (code) { + case lir_add: + __ add(op1_lo, op2_lo, dst_lo); + break; + + case lir_sub: + __ sub(op1_lo, op2_lo, dst_lo); + break; + + default: ShouldNotReachHere(); + } + } else { + assert (right->is_single_cpu(), "Just Checking"); + + Register lreg = left->as_register(); + Register res = dest->as_register(); + Register rreg = right->as_register(); + switch (code) { + case lir_add: __ add (lreg, rreg, res); break; + case lir_sub: __ sub (lreg, rreg, res); break; + case lir_mul: __ mulx (lreg, rreg, res); break; + default: ShouldNotReachHere(); + } + } + } else { + assert (right->is_constant(), "must be constant"); + + if (dest->is_single_cpu()) { + Register lreg = left->as_register(); + Register res = dest->as_register(); + int simm13 = right->as_constant_ptr()->as_jint(); + + switch (code) { + case lir_add: __ add (lreg, simm13, res); break; + case lir_sub: __ sub (lreg, simm13, res); break; + case lir_mul: __ mulx (lreg, simm13, res); break; + default: ShouldNotReachHere(); + } + } else { + Register lreg = left->as_pointer_register(); + Register res = dest->as_register_lo(); + long con = right->as_constant_ptr()->as_jlong(); + assert(Assembler::is_simm13(con), "must be simm13"); + + switch (code) { + case lir_add: __ add (lreg, (int)con, res); break; + case lir_sub: __ sub (lreg, (int)con, res); break; + case lir_mul: __ mulx (lreg, (int)con, res); break; + default: ShouldNotReachHere(); + } + } + } +} + +void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr thread, LIR_Opr dest, LIR_Op* op) { + switch (code) { + case lir_tan: { + assert(thread->is_valid(), "preserve the thread object for performance reasons"); + assert(dest->as_double_reg() == F0, "the result will be in f0/f1"); + break; + } + case lir_sqrt: { + assert(!thread->is_valid(), "there is no need for a thread_reg for dsqrt"); + FloatRegister src_reg = value->as_double_reg(); + FloatRegister dst_reg = dest->as_double_reg(); + __ fsqrt(FloatRegisterImpl::D, src_reg, dst_reg); + break; + } + case lir_abs: { + assert(!thread->is_valid(), "there is no need for a thread_reg for fabs"); + FloatRegister src_reg = value->as_double_reg(); + FloatRegister dst_reg = dest->as_double_reg(); + __ fabs(FloatRegisterImpl::D, src_reg, dst_reg); + break; + } + default: { + ShouldNotReachHere(); + break; + } + } +} + + +void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest) { + if (right->is_constant()) { + if (dest->is_single_cpu()) { + int simm13 = right->as_constant_ptr()->as_jint(); + switch (code) { + case lir_logic_and: __ and3 (left->as_register(), simm13, dest->as_register()); break; + case lir_logic_or: __ or3 (left->as_register(), simm13, dest->as_register()); break; + case lir_logic_xor: __ xor3 (left->as_register(), simm13, dest->as_register()); break; + default: ShouldNotReachHere(); + } + } else { + long c = right->as_constant_ptr()->as_jlong(); + assert(c == (int)c && Assembler::is_simm13(c), "out of range"); + int simm13 = (int)c; + switch (code) { + case lir_logic_and: + __ and3 (left->as_register_lo(), simm13, dest->as_register_lo()); + break; + + case lir_logic_or: + __ or3 (left->as_register_lo(), simm13, dest->as_register_lo()); + break; + + case lir_logic_xor: + __ xor3 (left->as_register_lo(), simm13, dest->as_register_lo()); + break; + + default: ShouldNotReachHere(); + } + } + } else { + assert(right->is_register(), "right should be in register"); + + if (dest->is_single_cpu()) { + switch (code) { + case lir_logic_and: __ and3 (left->as_register(), right->as_register(), dest->as_register()); break; + case lir_logic_or: __ or3 (left->as_register(), right->as_register(), dest->as_register()); break; + case lir_logic_xor: __ xor3 (left->as_register(), right->as_register(), dest->as_register()); break; + default: ShouldNotReachHere(); + } + } else { + Register l = (left->is_single_cpu() && left->is_oop_register()) ? left->as_register() : + left->as_register_lo(); + Register r = (right->is_single_cpu() && right->is_oop_register()) ? right->as_register() : + right->as_register_lo(); + + switch (code) { + case lir_logic_and: __ and3 (l, r, dest->as_register_lo()); break; + case lir_logic_or: __ or3 (l, r, dest->as_register_lo()); break; + case lir_logic_xor: __ xor3 (l, r, dest->as_register_lo()); break; + default: ShouldNotReachHere(); + } + } + } +} + + +int LIR_Assembler::shift_amount(BasicType t) { + int elem_size = type2aelembytes(t); + switch (elem_size) { + case 1 : return 0; + case 2 : return 1; + case 4 : return 2; + case 8 : return 3; + } + ShouldNotReachHere(); + return -1; +} + + +void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) { + assert(exceptionOop->as_register() == Oexception, "should match"); + assert(exceptionPC->as_register() == Oissuing_pc, "should match"); + + info->add_register_oop(exceptionOop); + + // reuse the debug info from the safepoint poll for the throw op itself + address pc_for_athrow = __ pc(); + int pc_for_athrow_offset = __ offset(); + RelocationHolder rspec = internal_word_Relocation::spec(pc_for_athrow); + __ set(pc_for_athrow, Oissuing_pc, rspec); + add_call_info(pc_for_athrow_offset, info); // for exception handler + + __ call(Runtime1::entry_for(Runtime1::handle_exception_id), relocInfo::runtime_call_type); + __ delayed()->nop(); +} + + +void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) { + assert(exceptionOop->as_register() == Oexception, "should match"); + + __ br(Assembler::always, false, Assembler::pt, _unwind_handler_entry); + __ delayed()->nop(); +} + +void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { + Register src = op->src()->as_register(); + Register dst = op->dst()->as_register(); + Register src_pos = op->src_pos()->as_register(); + Register dst_pos = op->dst_pos()->as_register(); + Register length = op->length()->as_register(); + Register tmp = op->tmp()->as_register(); + Register tmp2 = O7; + + int flags = op->flags(); + ciArrayKlass* default_type = op->expected_type(); + BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL; + if (basic_type == T_ARRAY) basic_type = T_OBJECT; + + // higher 32bits must be null + __ sra(dst_pos, 0, dst_pos); + __ sra(src_pos, 0, src_pos); + __ sra(length, 0, length); + + // set up the arraycopy stub information + ArrayCopyStub* stub = op->stub(); + + // always do stub if no type information is available. it's ok if + // the known type isn't loaded since the code sanity checks + // in debug mode and the type isn't required when we know the exact type + // also check that the type is an array type. + if (op->expected_type() == NULL) { + __ mov(src, O0); + __ mov(src_pos, O1); + __ mov(dst, O2); + __ mov(dst_pos, O3); + __ mov(length, O4); + address copyfunc_addr = StubRoutines::generic_arraycopy(); + assert(copyfunc_addr != NULL, "generic arraycopy stub required"); + +#ifndef PRODUCT + if (PrintC1Statistics) { + address counter = (address)&Runtime1::_generic_arraycopystub_cnt; + __ inc_counter(counter, G1, G3); + } +#endif + __ call_VM_leaf(tmp, copyfunc_addr); + + __ xor3(O0, -1, tmp); + __ sub(length, tmp, length); + __ add(src_pos, tmp, src_pos); + __ cmp_zero_and_br(Assembler::less, O0, *stub->entry()); + __ delayed()->add(dst_pos, tmp, dst_pos); + __ bind(*stub->continuation()); + return; + } + + assert(default_type != NULL && default_type->is_array_klass(), "must be true at this point"); + + // make sure src and dst are non-null and load array length + if (flags & LIR_OpArrayCopy::src_null_check) { + __ tst(src); + __ brx(Assembler::equal, false, Assembler::pn, *stub->entry()); + __ delayed()->nop(); + } + + if (flags & LIR_OpArrayCopy::dst_null_check) { + __ tst(dst); + __ brx(Assembler::equal, false, Assembler::pn, *stub->entry()); + __ delayed()->nop(); + } + + // If the compiler was not able to prove that exact type of the source or the destination + // of the arraycopy is an array type, check at runtime if the source or the destination is + // an instance type. + if (flags & LIR_OpArrayCopy::type_check) { + if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) { + __ load_klass(dst, tmp); + __ lduw(tmp, in_bytes(Klass::layout_helper_offset()), tmp2); + __ cmp(tmp2, Klass::_lh_neutral_value); + __ br(Assembler::greaterEqual, false, Assembler::pn, *stub->entry()); + __ delayed()->nop(); + } + + if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) { + __ load_klass(src, tmp); + __ lduw(tmp, in_bytes(Klass::layout_helper_offset()), tmp2); + __ cmp(tmp2, Klass::_lh_neutral_value); + __ br(Assembler::greaterEqual, false, Assembler::pn, *stub->entry()); + __ delayed()->nop(); + } + } + + if (flags & LIR_OpArrayCopy::src_pos_positive_check) { + // test src_pos register + __ cmp_zero_and_br(Assembler::less, src_pos, *stub->entry()); + __ delayed()->nop(); + } + + if (flags & LIR_OpArrayCopy::dst_pos_positive_check) { + // test dst_pos register + __ cmp_zero_and_br(Assembler::less, dst_pos, *stub->entry()); + __ delayed()->nop(); + } + + if (flags & LIR_OpArrayCopy::length_positive_check) { + // make sure length isn't negative + __ cmp_zero_and_br(Assembler::less, length, *stub->entry()); + __ delayed()->nop(); + } + + if (flags & LIR_OpArrayCopy::src_range_check) { + __ ld(src, arrayOopDesc::length_offset_in_bytes(), tmp2); + __ add(length, src_pos, tmp); + __ cmp(tmp2, tmp); + __ br(Assembler::carrySet, false, Assembler::pn, *stub->entry()); + __ delayed()->nop(); + } + + if (flags & LIR_OpArrayCopy::dst_range_check) { + __ ld(dst, arrayOopDesc::length_offset_in_bytes(), tmp2); + __ add(length, dst_pos, tmp); + __ cmp(tmp2, tmp); + __ br(Assembler::carrySet, false, Assembler::pn, *stub->entry()); + __ delayed()->nop(); + } + + int shift = shift_amount(basic_type); + + if (flags & LIR_OpArrayCopy::type_check) { + // We don't know the array types are compatible + if (basic_type != T_OBJECT) { + // Simple test for basic type arrays + if (UseCompressedClassPointers) { + // We don't need decode because we just need to compare + __ lduw(src, oopDesc::klass_offset_in_bytes(), tmp); + __ lduw(dst, oopDesc::klass_offset_in_bytes(), tmp2); + __ cmp(tmp, tmp2); + __ br(Assembler::notEqual, false, Assembler::pt, *stub->entry()); + } else { + __ ld_ptr(src, oopDesc::klass_offset_in_bytes(), tmp); + __ ld_ptr(dst, oopDesc::klass_offset_in_bytes(), tmp2); + __ cmp(tmp, tmp2); + __ brx(Assembler::notEqual, false, Assembler::pt, *stub->entry()); + } + __ delayed()->nop(); + } else { + // For object arrays, if src is a sub class of dst then we can + // safely do the copy. + address copyfunc_addr = StubRoutines::checkcast_arraycopy(); + + Label cont, slow; + assert_different_registers(tmp, tmp2, G3, G1); + + __ load_klass(src, G3); + __ load_klass(dst, G1); + + __ check_klass_subtype_fast_path(G3, G1, tmp, tmp2, &cont, copyfunc_addr == NULL ? stub->entry() : &slow, NULL); + + __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); + __ delayed()->nop(); + + __ cmp(G3, 0); + if (copyfunc_addr != NULL) { // use stub if available + // src is not a sub class of dst so we have to do a + // per-element check. + __ br(Assembler::notEqual, false, Assembler::pt, cont); + __ delayed()->nop(); + + __ bind(slow); + + int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray; + if ((flags & mask) != mask) { + // Check that at least both of them object arrays. + assert(flags & mask, "one of the two should be known to be an object array"); + + if (!(flags & LIR_OpArrayCopy::src_objarray)) { + __ load_klass(src, tmp); + } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) { + __ load_klass(dst, tmp); + } + int lh_offset = in_bytes(Klass::layout_helper_offset()); + + __ lduw(tmp, lh_offset, tmp2); + + jint objArray_lh = Klass::array_layout_helper(T_OBJECT); + __ set(objArray_lh, tmp); + __ cmp(tmp, tmp2); + __ br(Assembler::notEqual, false, Assembler::pt, *stub->entry()); + __ delayed()->nop(); + } + + Register src_ptr = O0; + Register dst_ptr = O1; + Register len = O2; + Register chk_off = O3; + Register super_k = O4; + + __ add(src, arrayOopDesc::base_offset_in_bytes(basic_type), src_ptr); + if (shift == 0) { + __ add(src_ptr, src_pos, src_ptr); + } else { + __ sll(src_pos, shift, tmp); + __ add(src_ptr, tmp, src_ptr); + } + + __ add(dst, arrayOopDesc::base_offset_in_bytes(basic_type), dst_ptr); + if (shift == 0) { + __ add(dst_ptr, dst_pos, dst_ptr); + } else { + __ sll(dst_pos, shift, tmp); + __ add(dst_ptr, tmp, dst_ptr); + } + __ mov(length, len); + __ load_klass(dst, tmp); + + int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); + __ ld_ptr(tmp, ek_offset, super_k); + + int sco_offset = in_bytes(Klass::super_check_offset_offset()); + __ lduw(super_k, sco_offset, chk_off); + + __ call_VM_leaf(tmp, copyfunc_addr); + +#ifndef PRODUCT + if (PrintC1Statistics) { + Label failed; + __ br_notnull_short(O0, Assembler::pn, failed); + __ inc_counter((address)&Runtime1::_arraycopy_checkcast_cnt, G1, G3); + __ bind(failed); + } +#endif + + __ br_null(O0, false, Assembler::pt, *stub->continuation()); + __ delayed()->xor3(O0, -1, tmp); + +#ifndef PRODUCT + if (PrintC1Statistics) { + __ inc_counter((address)&Runtime1::_arraycopy_checkcast_attempt_cnt, G1, G3); + } +#endif + + __ sub(length, tmp, length); + __ add(src_pos, tmp, src_pos); + __ br(Assembler::always, false, Assembler::pt, *stub->entry()); + __ delayed()->add(dst_pos, tmp, dst_pos); + + __ bind(cont); + } else { + __ br(Assembler::equal, false, Assembler::pn, *stub->entry()); + __ delayed()->nop(); + __ bind(cont); + } + } + } + +#ifdef ASSERT + if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) { + // Sanity check the known type with the incoming class. For the + // primitive case the types must match exactly with src.klass and + // dst.klass each exactly matching the default type. For the + // object array case, if no type check is needed then either the + // dst type is exactly the expected type and the src type is a + // subtype which we can't check or src is the same array as dst + // but not necessarily exactly of type default_type. + Label known_ok, halt; + metadata2reg(op->expected_type()->constant_encoding(), tmp); + if (UseCompressedClassPointers) { + // tmp holds the default type. It currently comes uncompressed after the + // load of a constant, so encode it. + __ encode_klass_not_null(tmp); + // load the raw value of the dst klass, since we will be comparing + // uncompressed values directly. + __ lduw(dst, oopDesc::klass_offset_in_bytes(), tmp2); + if (basic_type != T_OBJECT) { + __ cmp(tmp, tmp2); + __ br(Assembler::notEqual, false, Assembler::pn, halt); + // load the raw value of the src klass. + __ delayed()->lduw(src, oopDesc::klass_offset_in_bytes(), tmp2); + __ cmp_and_br_short(tmp, tmp2, Assembler::equal, Assembler::pn, known_ok); + } else { + __ cmp(tmp, tmp2); + __ br(Assembler::equal, false, Assembler::pn, known_ok); + __ delayed()->cmp(src, dst); + __ brx(Assembler::equal, false, Assembler::pn, known_ok); + __ delayed()->nop(); + } + } else { + __ ld_ptr(dst, oopDesc::klass_offset_in_bytes(), tmp2); + if (basic_type != T_OBJECT) { + __ cmp(tmp, tmp2); + __ brx(Assembler::notEqual, false, Assembler::pn, halt); + __ delayed()->ld_ptr(src, oopDesc::klass_offset_in_bytes(), tmp2); + __ cmp_and_brx_short(tmp, tmp2, Assembler::equal, Assembler::pn, known_ok); + } else { + __ cmp(tmp, tmp2); + __ brx(Assembler::equal, false, Assembler::pn, known_ok); + __ delayed()->cmp(src, dst); + __ brx(Assembler::equal, false, Assembler::pn, known_ok); + __ delayed()->nop(); + } + } + __ bind(halt); + __ stop("incorrect type information in arraycopy"); + __ bind(known_ok); + } +#endif + +#ifndef PRODUCT + if (PrintC1Statistics) { + address counter = Runtime1::arraycopy_count_address(basic_type); + __ inc_counter(counter, G1, G3); + } +#endif + + Register src_ptr = O0; + Register dst_ptr = O1; + Register len = O2; + + __ add(src, arrayOopDesc::base_offset_in_bytes(basic_type), src_ptr); + if (shift == 0) { + __ add(src_ptr, src_pos, src_ptr); + } else { + __ sll(src_pos, shift, tmp); + __ add(src_ptr, tmp, src_ptr); + } + + __ add(dst, arrayOopDesc::base_offset_in_bytes(basic_type), dst_ptr); + if (shift == 0) { + __ add(dst_ptr, dst_pos, dst_ptr); + } else { + __ sll(dst_pos, shift, tmp); + __ add(dst_ptr, tmp, dst_ptr); + } + + bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0; + bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0; + const char *name; + address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false); + + // arraycopy stubs takes a length in number of elements, so don't scale it. + __ mov(length, len); + __ call_VM_leaf(tmp, entry); + + __ bind(*stub->continuation()); +} + + +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) { + if (dest->is_single_cpu()) { + if (left->type() == T_OBJECT) { + switch (code) { + case lir_shl: __ sllx (left->as_register(), count->as_register(), dest->as_register()); break; + case lir_shr: __ srax (left->as_register(), count->as_register(), dest->as_register()); break; + case lir_ushr: __ srl (left->as_register(), count->as_register(), dest->as_register()); break; + default: ShouldNotReachHere(); + } + } else + switch (code) { + case lir_shl: __ sll (left->as_register(), count->as_register(), dest->as_register()); break; + case lir_shr: __ sra (left->as_register(), count->as_register(), dest->as_register()); break; + case lir_ushr: __ srl (left->as_register(), count->as_register(), dest->as_register()); break; + default: ShouldNotReachHere(); + } + } else { + switch (code) { + case lir_shl: __ sllx (left->as_register_lo(), count->as_register(), dest->as_register_lo()); break; + case lir_shr: __ srax (left->as_register_lo(), count->as_register(), dest->as_register_lo()); break; + case lir_ushr: __ srlx (left->as_register_lo(), count->as_register(), dest->as_register_lo()); break; + default: ShouldNotReachHere(); + } + } +} + + +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) { + if (left->type() == T_OBJECT) { + count = count & 63; // shouldn't shift by more than sizeof(intptr_t) + Register l = left->as_register(); + Register d = dest->as_register_lo(); + switch (code) { + case lir_shl: __ sllx (l, count, d); break; + case lir_shr: __ srax (l, count, d); break; + case lir_ushr: __ srlx (l, count, d); break; + default: ShouldNotReachHere(); + } + return; + } + + if (dest->is_single_cpu()) { + count = count & 0x1F; // Java spec + switch (code) { + case lir_shl: __ sll (left->as_register(), count, dest->as_register()); break; + case lir_shr: __ sra (left->as_register(), count, dest->as_register()); break; + case lir_ushr: __ srl (left->as_register(), count, dest->as_register()); break; + default: ShouldNotReachHere(); + } + } else if (dest->is_double_cpu()) { + count = count & 63; // Java spec + switch (code) { + case lir_shl: __ sllx (left->as_pointer_register(), count, dest->as_pointer_register()); break; + case lir_shr: __ srax (left->as_pointer_register(), count, dest->as_pointer_register()); break; + case lir_ushr: __ srlx (left->as_pointer_register(), count, dest->as_pointer_register()); break; + default: ShouldNotReachHere(); + } + } else { + ShouldNotReachHere(); + } +} + + +void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { + assert(op->tmp1()->as_register() == G1 && + op->tmp2()->as_register() == G3 && + op->tmp3()->as_register() == G4 && + op->obj()->as_register() == O0 && + op->klass()->as_register() == G5, "must be"); + if (op->init_check()) { + add_debug_info_for_null_check_here(op->stub()->info()); + __ ldub(op->klass()->as_register(), + in_bytes(InstanceKlass::init_state_offset()), + op->tmp1()->as_register()); + __ cmp(op->tmp1()->as_register(), InstanceKlass::fully_initialized); + __ br(Assembler::notEqual, false, Assembler::pn, *op->stub()->entry()); + __ delayed()->nop(); + } + __ allocate_object(op->obj()->as_register(), + op->tmp1()->as_register(), + op->tmp2()->as_register(), + op->tmp3()->as_register(), + op->header_size(), + op->object_size(), + op->klass()->as_register(), + *op->stub()->entry()); + __ bind(*op->stub()->continuation()); + __ verify_oop(op->obj()->as_register()); +} + + +void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) { + assert(op->tmp1()->as_register() == G1 && + op->tmp2()->as_register() == G3 && + op->tmp3()->as_register() == G4 && + op->tmp4()->as_register() == O1 && + op->klass()->as_register() == G5, "must be"); + + __ signx(op->len()->as_register()); + if (UseSlowPath || + (!UseFastNewObjectArray && is_reference_type(op->type())) || + (!UseFastNewTypeArray && !is_reference_type(op->type()))) { + __ br(Assembler::always, false, Assembler::pt, *op->stub()->entry()); + __ delayed()->nop(); + } else { + __ allocate_array(op->obj()->as_register(), + op->len()->as_register(), + op->tmp1()->as_register(), + op->tmp2()->as_register(), + op->tmp3()->as_register(), + arrayOopDesc::header_size(op->type()), + type2aelembytes(op->type()), + op->klass()->as_register(), + *op->stub()->entry()); + } + __ bind(*op->stub()->continuation()); +} + + +void LIR_Assembler::type_profile_helper(Register mdo, int mdo_offset_bias, + ciMethodData *md, ciProfileData *data, + Register recv, Register tmp1, Label* update_done) { + uint i; + for (i = 0; i < VirtualCallData::row_limit(); i++) { + Label next_test; + // See if the receiver is receiver[n]. + Address receiver_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)) - + mdo_offset_bias); + __ ld_ptr(receiver_addr, tmp1); + __ verify_klass_ptr(tmp1); + __ cmp_and_brx_short(recv, tmp1, Assembler::notEqual, Assembler::pt, next_test); + Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)) - + mdo_offset_bias); + __ ld_ptr(data_addr, tmp1); + __ add(tmp1, DataLayout::counter_increment, tmp1); + __ st_ptr(tmp1, data_addr); + __ ba(*update_done); + __ delayed()->nop(); + __ bind(next_test); + } + + // Didn't find receiver; find next empty slot and fill it in + for (i = 0; i < VirtualCallData::row_limit(); i++) { + Label next_test; + Address recv_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)) - + mdo_offset_bias); + __ ld_ptr(recv_addr, tmp1); + __ br_notnull_short(tmp1, Assembler::pt, next_test); + __ st_ptr(recv, recv_addr); + __ set(DataLayout::counter_increment, tmp1); + __ st_ptr(tmp1, mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)) - + mdo_offset_bias); + __ ba(*update_done); + __ delayed()->nop(); + __ bind(next_test); + } +} + + +void LIR_Assembler::setup_md_access(ciMethod* method, int bci, + ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias) { + md = method->method_data_or_null(); + assert(md != NULL, "Sanity"); + data = md->bci_to_data(bci); + assert(data != NULL, "need data for checkcast"); + assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check"); + if (!Assembler::is_simm13(md->byte_offset_of_slot(data, DataLayout::header_offset()) + data->size_in_bytes())) { + // The offset is large so bias the mdo by the base of the slot so + // that the ld can use simm13s to reference the slots of the data + mdo_offset_bias = md->byte_offset_of_slot(data, DataLayout::header_offset()); + } +} + +void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null) { + // we always need a stub for the failure case. + CodeStub* stub = op->stub(); + Register obj = op->object()->as_register(); + Register k_RInfo = op->tmp1()->as_register(); + Register klass_RInfo = op->tmp2()->as_register(); + Register dst = op->result_opr()->as_register(); + Register Rtmp1 = op->tmp3()->as_register(); + ciKlass* k = op->klass(); + + + if (obj == k_RInfo) { + k_RInfo = klass_RInfo; + klass_RInfo = obj; + } + + ciMethodData* md; + ciProfileData* data; + int mdo_offset_bias = 0; + if (op->should_profile()) { + ciMethod* method = op->profiled_method(); + assert(method != NULL, "Should have method"); + setup_md_access(method, op->profiled_bci(), md, data, mdo_offset_bias); + + Label not_null; + __ br_notnull_short(obj, Assembler::pn, not_null); + Register mdo = k_RInfo; + Register data_val = Rtmp1; + metadata2reg(md->constant_encoding(), mdo); + if (mdo_offset_bias > 0) { + __ set(mdo_offset_bias, data_val); + __ add(mdo, data_val, mdo); + } + Address flags_addr(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset()) - mdo_offset_bias); + __ ldub(flags_addr, data_val); + __ or3(data_val, BitData::null_seen_byte_constant(), data_val); + __ stb(data_val, flags_addr); + __ ba(*obj_is_null); + __ delayed()->nop(); + __ bind(not_null); + } else { + __ br_null(obj, false, Assembler::pn, *obj_is_null); + __ delayed()->nop(); + } + + Label profile_cast_failure, profile_cast_success; + Label *failure_target = op->should_profile() ? &profile_cast_failure : failure; + Label *success_target = op->should_profile() ? &profile_cast_success : success; + + // patching may screw with our temporaries on sparc, + // so let's do it before loading the class + if (k->is_loaded()) { + metadata2reg(k->constant_encoding(), k_RInfo); + } else { + klass2reg_with_patching(k_RInfo, op->info_for_patch()); + } + assert(obj != k_RInfo, "must be different"); + + // get object class + // not a safepoint as obj null check happens earlier + __ load_klass(obj, klass_RInfo); + if (op->fast_check()) { + assert_different_registers(klass_RInfo, k_RInfo); + __ cmp(k_RInfo, klass_RInfo); + __ brx(Assembler::notEqual, false, Assembler::pt, *failure_target); + __ delayed()->nop(); + } else { + bool need_slow_path = true; + if (k->is_loaded()) { + if ((int) k->super_check_offset() != in_bytes(Klass::secondary_super_cache_offset())) + need_slow_path = false; + // perform the fast part of the checking logic + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, noreg, + (need_slow_path ? success_target : NULL), + failure_target, NULL, + RegisterOrConstant(k->super_check_offset())); + } else { + // perform the fast part of the checking logic + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7, success_target, + failure_target, NULL); + } + if (need_slow_path) { + // call out-of-line instance of __ check_klass_subtype_slow_path(...): + assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup"); + __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); + __ delayed()->nop(); + __ cmp(G3, 0); + __ br(Assembler::equal, false, Assembler::pn, *failure_target); + __ delayed()->nop(); + // Fall through to success case + } + } + + if (op->should_profile()) { + Register mdo = klass_RInfo, recv = k_RInfo, tmp1 = Rtmp1; + assert_different_registers(obj, mdo, recv, tmp1); + __ bind(profile_cast_success); + metadata2reg(md->constant_encoding(), mdo); + if (mdo_offset_bias > 0) { + __ set(mdo_offset_bias, tmp1); + __ add(mdo, tmp1, mdo); + } + __ load_klass(obj, recv); + type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1, success); + // Jump over the failure case + __ ba(*success); + __ delayed()->nop(); + // Cast failure case + __ bind(profile_cast_failure); + metadata2reg(md->constant_encoding(), mdo); + if (mdo_offset_bias > 0) { + __ set(mdo_offset_bias, tmp1); + __ add(mdo, tmp1, mdo); + } + Address data_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias); + __ ld_ptr(data_addr, tmp1); + __ sub(tmp1, DataLayout::counter_increment, tmp1); + __ st_ptr(tmp1, data_addr); + __ ba(*failure); + __ delayed()->nop(); + } + __ ba(*success); + __ delayed()->nop(); +} + +void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { + LIR_Code code = op->code(); + if (code == lir_store_check) { + Register value = op->object()->as_register(); + Register array = op->array()->as_register(); + Register k_RInfo = op->tmp1()->as_register(); + Register klass_RInfo = op->tmp2()->as_register(); + Register Rtmp1 = op->tmp3()->as_register(); + + __ verify_oop(value); + CodeStub* stub = op->stub(); + // check if it needs to be profiled + ciMethodData* md; + ciProfileData* data; + int mdo_offset_bias = 0; + if (op->should_profile()) { + ciMethod* method = op->profiled_method(); + assert(method != NULL, "Should have method"); + setup_md_access(method, op->profiled_bci(), md, data, mdo_offset_bias); + } + Label profile_cast_success, profile_cast_failure, done; + Label *success_target = op->should_profile() ? &profile_cast_success : &done; + Label *failure_target = op->should_profile() ? &profile_cast_failure : stub->entry(); + + if (op->should_profile()) { + Label not_null; + __ br_notnull_short(value, Assembler::pn, not_null); + Register mdo = k_RInfo; + Register data_val = Rtmp1; + metadata2reg(md->constant_encoding(), mdo); + if (mdo_offset_bias > 0) { + __ set(mdo_offset_bias, data_val); + __ add(mdo, data_val, mdo); + } + Address flags_addr(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset()) - mdo_offset_bias); + __ ldub(flags_addr, data_val); + __ or3(data_val, BitData::null_seen_byte_constant(), data_val); + __ stb(data_val, flags_addr); + __ ba_short(done); + __ bind(not_null); + } else { + __ br_null_short(value, Assembler::pn, done); + } + add_debug_info_for_null_check_here(op->info_for_exception()); + __ load_klass(array, k_RInfo); + __ load_klass(value, klass_RInfo); + + // get instance klass + __ ld_ptr(Address(k_RInfo, ObjArrayKlass::element_klass_offset()), k_RInfo); + // perform the fast part of the checking logic + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7, success_target, failure_target, NULL); + + // call out-of-line instance of __ check_klass_subtype_slow_path(...): + assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup"); + __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); + __ delayed()->nop(); + __ cmp(G3, 0); + __ br(Assembler::equal, false, Assembler::pn, *failure_target); + __ delayed()->nop(); + // fall through to the success case + + if (op->should_profile()) { + Register mdo = klass_RInfo, recv = k_RInfo, tmp1 = Rtmp1; + assert_different_registers(value, mdo, recv, tmp1); + __ bind(profile_cast_success); + metadata2reg(md->constant_encoding(), mdo); + if (mdo_offset_bias > 0) { + __ set(mdo_offset_bias, tmp1); + __ add(mdo, tmp1, mdo); + } + __ load_klass(value, recv); + type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1, &done); + __ ba_short(done); + // Cast failure case + __ bind(profile_cast_failure); + metadata2reg(md->constant_encoding(), mdo); + if (mdo_offset_bias > 0) { + __ set(mdo_offset_bias, tmp1); + __ add(mdo, tmp1, mdo); + } + Address data_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias); + __ ld_ptr(data_addr, tmp1); + __ sub(tmp1, DataLayout::counter_increment, tmp1); + __ st_ptr(tmp1, data_addr); + __ ba(*stub->entry()); + __ delayed()->nop(); + } + __ bind(done); + } else if (code == lir_checkcast) { + Register obj = op->object()->as_register(); + Register dst = op->result_opr()->as_register(); + Label success; + emit_typecheck_helper(op, &success, op->stub()->entry(), &success); + __ bind(success); + __ mov(obj, dst); + } else if (code == lir_instanceof) { + Register obj = op->object()->as_register(); + Register dst = op->result_opr()->as_register(); + Label success, failure, done; + emit_typecheck_helper(op, &success, &failure, &failure); + __ bind(failure); + __ set(0, dst); + __ ba_short(done); + __ bind(success); + __ set(1, dst); + __ bind(done); + } else { + ShouldNotReachHere(); + } + +} + + +void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { + if (op->code() == lir_cas_long) { + assert(VM_Version::supports_cx8(), "wrong machine"); + Register addr = op->addr()->as_pointer_register(); + Register cmp_value_lo = op->cmp_value()->as_register_lo(); + Register cmp_value_hi = op->cmp_value()->as_register_hi(); + Register new_value_lo = op->new_value()->as_register_lo(); + Register new_value_hi = op->new_value()->as_register_hi(); + Register t1 = op->tmp1()->as_register(); + Register t2 = op->tmp2()->as_register(); + __ mov(cmp_value_lo, t1); + __ mov(new_value_lo, t2); + // perform the compare and swap operation + __ casx(addr, t1, t2); + // generate condition code - if the swap succeeded, t2 ("new value" reg) was + // overwritten with the original value in "addr" and will be equal to t1. + __ cmp(t1, t2); + } else if (op->code() == lir_cas_int || op->code() == lir_cas_obj) { + Register addr = op->addr()->as_pointer_register(); + Register cmp_value = op->cmp_value()->as_register(); + Register new_value = op->new_value()->as_register(); + Register t1 = op->tmp1()->as_register(); + Register t2 = op->tmp2()->as_register(); + __ mov(cmp_value, t1); + __ mov(new_value, t2); + if (op->code() == lir_cas_obj) { + if (UseCompressedOops) { + __ encode_heap_oop(t1); + __ encode_heap_oop(t2); + __ cas(addr, t1, t2); + } else { + __ cas_ptr(addr, t1, t2); + } + } else { + __ cas(addr, t1, t2); + } + __ cmp(t1, t2); + } else { + Unimplemented(); + } +} + +void LIR_Assembler::breakpoint() { + __ breakpoint_trap(); +} + + +void LIR_Assembler::push(LIR_Opr opr) { + Unimplemented(); +} + + +void LIR_Assembler::pop(LIR_Opr opr) { + Unimplemented(); +} + + +void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst_opr) { + Address mon_addr = frame_map()->address_for_monitor_lock(monitor_no); + Register dst = dst_opr->as_register(); + Register reg = mon_addr.base(); + int offset = mon_addr.disp(); + // compute pointer to BasicLock + if (mon_addr.is_simm13()) { + __ add(reg, offset, dst); + } else { + __ set(offset, dst); + __ add(dst, reg, dst); + } +} + +void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) { + assert(op->crc()->is_single_cpu(), "crc must be register"); + assert(op->val()->is_single_cpu(), "byte value must be register"); + assert(op->result_opr()->is_single_cpu(), "result must be register"); + Register crc = op->crc()->as_register(); + Register val = op->val()->as_register(); + Register table = op->result_opr()->as_register(); + Register res = op->result_opr()->as_register(); + + assert_different_registers(val, crc, table); + + __ set(ExternalAddress(StubRoutines::crc_table_addr()), table); + __ not1(crc); + __ clruwu(crc); + __ update_byte_crc32(crc, val, table); + __ not1(crc); + + __ mov(crc, res); +} + +void LIR_Assembler::emit_lock(LIR_OpLock* op) { + Register obj = op->obj_opr()->as_register(); + Register hdr = op->hdr_opr()->as_register(); + Register lock = op->lock_opr()->as_register(); + + // obj may not be an oop + if (op->code() == lir_lock) { + MonitorEnterStub* stub = (MonitorEnterStub*)op->stub(); + if (UseFastLocking) { + assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); + // add debug info for NullPointerException only if one is possible + if (op->info() != NULL) { + add_debug_info_for_null_check_here(op->info()); + } + __ lock_object(hdr, obj, lock, op->scratch_opr()->as_register(), *op->stub()->entry()); + } else { + // always do slow locking + // note: the slow locking code could be inlined here, however if we use + // slow locking, speed doesn't matter anyway and this solution is + // simpler and requires less duplicated code - additionally, the + // slow locking code is the same in either case which simplifies + // debugging + __ br(Assembler::always, false, Assembler::pt, *op->stub()->entry()); + __ delayed()->nop(); + } + } else { + assert (op->code() == lir_unlock, "Invalid code, expected lir_unlock"); + if (UseFastLocking) { + assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); + __ unlock_object(hdr, obj, lock, *op->stub()->entry()); + } else { + // always do slow unlocking + // note: the slow unlocking code could be inlined here, however if we use + // slow unlocking, speed doesn't matter anyway and this solution is + // simpler and requires less duplicated code - additionally, the + // slow unlocking code is the same in either case which simplifies + // debugging + __ br(Assembler::always, false, Assembler::pt, *op->stub()->entry()); + __ delayed()->nop(); + } + } + __ bind(*op->stub()->continuation()); +} + +void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { + Register obj = op->obj()->as_pointer_register(); + Register result = op->result_opr()->as_pointer_register(); + + CodeEmitInfo* info = op->info(); + if (info != NULL) { + add_debug_info_for_null_check_here(info); + } + + if (UseCompressedClassPointers) { + __ lduw(obj, oopDesc::klass_offset_in_bytes(), result); + __ decode_klass_not_null(result); + } else { + __ ld_ptr(obj, oopDesc::klass_offset_in_bytes(), result); + } +} + +void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { + ciMethod* method = op->profiled_method(); + int bci = op->profiled_bci(); + ciMethod* callee = op->profiled_callee(); + + // Update counter for all call types + ciMethodData* md = method->method_data_or_null(); + assert(md != NULL, "Sanity"); + ciProfileData* data = md->bci_to_data(bci); + assert(data != NULL && data->is_CounterData(), "need CounterData for calls"); + assert(op->mdo()->is_single_cpu(), "mdo must be allocated"); + Register mdo = op->mdo()->as_register(); + assert(op->tmp1()->is_double_cpu(), "tmp1 must be allocated"); + Register tmp1 = op->tmp1()->as_register_lo(); + metadata2reg(md->constant_encoding(), mdo); + int mdo_offset_bias = 0; + if (!Assembler::is_simm13(md->byte_offset_of_slot(data, CounterData::count_offset()) + + data->size_in_bytes())) { + // The offset is large so bias the mdo by the base of the slot so + // that the ld can use simm13s to reference the slots of the data + mdo_offset_bias = md->byte_offset_of_slot(data, CounterData::count_offset()); + __ set(mdo_offset_bias, O7); + __ add(mdo, O7, mdo); + } + + Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias); + // Perform additional virtual call profiling for invokevirtual and + // invokeinterface bytecodes + if (op->should_profile_receiver_type()) { + assert(op->recv()->is_single_cpu(), "recv must be allocated"); + Register recv = op->recv()->as_register(); + assert_different_registers(mdo, tmp1, recv); + assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls"); + ciKlass* known_klass = op->known_holder(); + if (C1OptimizeVirtualCallProfiling && known_klass != NULL) { + // We know the type that will be seen at this call site; we can + // statically update the MethodData* rather than needing to do + // dynamic tests on the receiver type + + // NOTE: we should probably put a lock around this search to + // avoid collisions by concurrent compilations + ciVirtualCallData* vc_data = (ciVirtualCallData*) data; + uint i; + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (known_klass->equals(receiver)) { + Address data_addr(mdo, md->byte_offset_of_slot(data, + VirtualCallData::receiver_count_offset(i)) - + mdo_offset_bias); + __ ld_ptr(data_addr, tmp1); + __ add(tmp1, DataLayout::counter_increment, tmp1); + __ st_ptr(tmp1, data_addr); + return; + } + } + + // Receiver type not found in profile data; select an empty slot + + // Note that this is less efficient than it should be because it + // always does a write to the receiver part of the + // VirtualCallData rather than just the first time + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (receiver == NULL) { + Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)) - + mdo_offset_bias); + metadata2reg(known_klass->constant_encoding(), tmp1); + __ st_ptr(tmp1, recv_addr); + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)) - + mdo_offset_bias); + __ ld_ptr(data_addr, tmp1); + __ add(tmp1, DataLayout::counter_increment, tmp1); + __ st_ptr(tmp1, data_addr); + return; + } + } + } else { + __ load_klass(recv, recv); + Label update_done; + type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1, &update_done); + // Receiver did not match any saved receiver and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + __ ld_ptr(counter_addr, tmp1); + __ add(tmp1, DataLayout::counter_increment, tmp1); + __ st_ptr(tmp1, counter_addr); + + __ bind(update_done); + } + } else { + // Static call + __ ld_ptr(counter_addr, tmp1); + __ add(tmp1, DataLayout::counter_increment, tmp1); + __ st_ptr(tmp1, counter_addr); + } +} + +void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { + Register obj = op->obj()->as_register(); + Register tmp1 = op->tmp()->as_pointer_register(); + Register tmp2 = G1; + Address mdo_addr = as_Address(op->mdp()->as_address_ptr()); + ciKlass* exact_klass = op->exact_klass(); + intptr_t current_klass = op->current_klass(); + bool not_null = op->not_null(); + bool no_conflict = op->no_conflict(); + + Label update, next, none; + + bool do_null = !not_null; + bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass; + bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set; + + assert(do_null || do_update, "why are we here?"); + assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?"); + + __ verify_oop(obj); + + if (tmp1 != obj) { + __ mov(obj, tmp1); + } + if (do_null) { + __ br_notnull_short(tmp1, Assembler::pt, update); + if (!TypeEntries::was_null_seen(current_klass)) { + __ ld_ptr(mdo_addr, tmp1); + __ or3(tmp1, TypeEntries::null_seen, tmp1); + __ st_ptr(tmp1, mdo_addr); + } + if (do_update) { + __ ba(next); + __ delayed()->nop(); + } +#ifdef ASSERT + } else { + __ br_notnull_short(tmp1, Assembler::pt, update); + __ stop("unexpect null obj"); +#endif + } + + __ bind(update); + + if (do_update) { +#ifdef ASSERT + if (exact_klass != NULL) { + Label ok; + __ load_klass(tmp1, tmp1); + metadata2reg(exact_klass->constant_encoding(), tmp2); + __ cmp_and_br_short(tmp1, tmp2, Assembler::equal, Assembler::pt, ok); + __ stop("exact klass and actual klass differ"); + __ bind(ok); + } +#endif + + Label do_update; + __ ld_ptr(mdo_addr, tmp2); + + if (!no_conflict) { + if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) { + if (exact_klass != NULL) { + metadata2reg(exact_klass->constant_encoding(), tmp1); + } else { + __ load_klass(tmp1, tmp1); + } + + __ xor3(tmp1, tmp2, tmp1); + __ btst(TypeEntries::type_klass_mask, tmp1); + // klass seen before, nothing to do. The unknown bit may have been + // set already but no need to check. + __ brx(Assembler::zero, false, Assembler::pt, next); + __ delayed()-> + + btst(TypeEntries::type_unknown, tmp1); + // already unknown. Nothing to do anymore. + __ brx(Assembler::notZero, false, Assembler::pt, next); + + if (TypeEntries::is_type_none(current_klass)) { + __ delayed()->btst(TypeEntries::type_mask, tmp2); + __ brx(Assembler::zero, true, Assembler::pt, do_update); + // first time here. Set profile type. + __ delayed()->or3(tmp2, tmp1, tmp2); + } else { + __ delayed()->nop(); + } + } else { + assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && + ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only"); + + __ btst(TypeEntries::type_unknown, tmp2); + // already unknown. Nothing to do anymore. + __ brx(Assembler::notZero, false, Assembler::pt, next); + __ delayed()->nop(); + } + + // different than before. Cannot keep accurate profile. + __ or3(tmp2, TypeEntries::type_unknown, tmp2); + } else { + // There's a single possible klass at this profile point + assert(exact_klass != NULL, "should be"); + if (TypeEntries::is_type_none(current_klass)) { + metadata2reg(exact_klass->constant_encoding(), tmp1); + __ xor3(tmp1, tmp2, tmp1); + __ btst(TypeEntries::type_klass_mask, tmp1); + __ brx(Assembler::zero, false, Assembler::pt, next); +#ifdef ASSERT + + { + Label ok; + __ delayed()->btst(TypeEntries::type_mask, tmp2); + __ brx(Assembler::zero, true, Assembler::pt, ok); + __ delayed()->nop(); + + __ stop("unexpected profiling mismatch"); + __ bind(ok); + } + // first time here. Set profile type. + __ or3(tmp2, tmp1, tmp2); +#else + // first time here. Set profile type. + __ delayed()->or3(tmp2, tmp1, tmp2); +#endif + + } else { + assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && + ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent"); + + // already unknown. Nothing to do anymore. + __ btst(TypeEntries::type_unknown, tmp2); + __ brx(Assembler::notZero, false, Assembler::pt, next); + __ delayed()->or3(tmp2, TypeEntries::type_unknown, tmp2); + } + } + + __ bind(do_update); + __ st_ptr(tmp2, mdo_addr); + + __ bind(next); + } +} + +void LIR_Assembler::align_backward_branch_target() { + __ align(OptoLoopAlignment); +} + + +void LIR_Assembler::emit_delay(LIR_OpDelay* op) { + // make sure we are expecting a delay + // this has the side effect of clearing the delay state + // so we can use _masm instead of _masm->delayed() to do the + // code generation. + __ delayed(); + + // make sure we only emit one instruction + int offset = code_offset(); + op->delay_op()->emit_code(this); +#ifdef ASSERT + if (code_offset() - offset != NativeInstruction::nop_instruction_size) { + op->delay_op()->print(); + } + assert(code_offset() - offset == NativeInstruction::nop_instruction_size, + "only one instruction can go in a delay slot"); +#endif + + // we may also be emitting the call info for the instruction + // which we are the delay slot of. + CodeEmitInfo* call_info = op->call_info(); + if (call_info) { + add_call_info(code_offset(), call_info); + } + + if (VerifyStackAtCalls) { + _masm->sub(FP, SP, O7); + _masm->cmp(O7, initial_frame_size_in_bytes()); + _masm->trap(Assembler::notEqual, Assembler::ptr_cc, G0, ST_RESERVED_FOR_USER_0+2 ); + } +} + + +void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) { + // tmp must be unused + assert(tmp->is_illegal(), "wasting a register if tmp is allocated"); + assert(left->is_register(), "can only handle registers"); + + if (left->is_single_cpu()) { + __ neg(left->as_register(), dest->as_register()); + } else if (left->is_single_fpu()) { + __ fneg(FloatRegisterImpl::S, left->as_float_reg(), dest->as_float_reg()); + } else if (left->is_double_fpu()) { + __ fneg(FloatRegisterImpl::D, left->as_double_reg(), dest->as_double_reg()); + } else { + assert (left->is_double_cpu(), "Must be a long"); + Register Rlow = left->as_register_lo(); + Register Rhi = left->as_register_hi(); + __ sub(G0, Rlow, dest->as_register_lo()); + } +} + +void LIR_Assembler::rt_call(LIR_Opr result, address dest, + const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) { + + // if tmp is invalid, then the function being called doesn't destroy the thread + if (tmp->is_valid()) { + __ save_thread(tmp->as_pointer_register()); + } + __ call(dest, relocInfo::runtime_call_type); + __ delayed()->nop(); + if (info != NULL) { + add_call_info_here(info); + } + if (tmp->is_valid()) { + __ restore_thread(tmp->as_pointer_register()); + } + +#ifdef ASSERT + __ verify_thread(); +#endif // ASSERT +} + + +void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) { + ShouldNotReachHere(); + + NEEDS_CLEANUP; + if (type == T_LONG) { + LIR_Address* mem_addr = dest->is_address() ? dest->as_address_ptr() : src->as_address_ptr(); + + // (extended to allow indexed as well as constant displaced for JSR-166) + Register idx = noreg; // contains either constant offset or index + + int disp = mem_addr->disp(); + if (mem_addr->index() == LIR_OprFact::illegalOpr) { + if (!Assembler::is_simm13(disp)) { + idx = O7; + __ set(disp, idx); + } + } else { + assert(disp == 0, "not both indexed and disp"); + idx = mem_addr->index()->as_register(); + } + + int null_check_offset = -1; + + Register base = mem_addr->base()->as_register(); + if (src->is_register() && dest->is_address()) { + // G4 is high half, G5 is low half + // clear the top bits of G5, and scale up G4 + __ srl (src->as_register_lo(), 0, G5); + __ sllx(src->as_register_hi(), 32, G4); + // combine the two halves into the 64 bits of G4 + __ or3(G4, G5, G4); + null_check_offset = __ offset(); + if (idx == noreg) { + __ stx(G4, base, disp); + } else { + __ stx(G4, base, idx); + } + } else if (src->is_address() && dest->is_register()) { + null_check_offset = __ offset(); + if (idx == noreg) { + __ ldx(base, disp, G5); + } else { + __ ldx(base, idx, G5); + } + __ srax(G5, 32, dest->as_register_hi()); // fetch the high half into hi + __ mov (G5, dest->as_register_lo()); // copy low half into lo + } else { + Unimplemented(); + } + if (info != NULL) { + add_debug_info_for_null_check(null_check_offset, info); + } + + } else { + // use normal move for all other volatiles since they don't need + // special handling to remain atomic. + move_op(src, dest, type, lir_patch_none, info, false, false, false); + } +} + +void LIR_Assembler::membar() { + // only StoreLoad membars are ever explicitly needed on sparcs in TSO mode + __ membar( Assembler::Membar_mask_bits(Assembler::StoreLoad) ); +} + +void LIR_Assembler::membar_acquire() { + // no-op on TSO +} + +void LIR_Assembler::membar_release() { + // no-op on TSO +} + +void LIR_Assembler::membar_loadload() { + // no-op + //__ membar(Assembler::Membar_mask_bits(Assembler::loadload)); +} + +void LIR_Assembler::membar_storestore() { + // no-op + //__ membar(Assembler::Membar_mask_bits(Assembler::storestore)); +} + +void LIR_Assembler::membar_loadstore() { + // no-op + //__ membar(Assembler::Membar_mask_bits(Assembler::loadstore)); +} + +void LIR_Assembler::membar_storeload() { + __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad)); +} + +void LIR_Assembler::on_spin_wait() { + Unimplemented(); +} + +// Pack two sequential registers containing 32 bit values +// into a single 64 bit register. +// src and src->successor() are packed into dst +// src and dst may be the same register. +// Note: src is destroyed +void LIR_Assembler::pack64(LIR_Opr src, LIR_Opr dst) { + Register rs = src->as_register(); + Register rd = dst->as_register_lo(); + __ sllx(rs, 32, rs); + __ srl(rs->successor(), 0, rs->successor()); + __ or3(rs, rs->successor(), rd); +} + +// Unpack a 64 bit value in a register into +// two sequential registers. +// src is unpacked into dst and dst->successor() +void LIR_Assembler::unpack64(LIR_Opr src, LIR_Opr dst) { + Register rs = src->as_register_lo(); + Register rd = dst->as_register_hi(); + assert_different_registers(rs, rd, rd->successor()); + __ srlx(rs, 32, rd); + __ srl (rs, 0, rd->successor()); +} + +void LIR_Assembler::leal(LIR_Opr addr_opr, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { + const LIR_Address* addr = addr_opr->as_address_ptr(); + assert(addr->scale() == LIR_Address::times_1, "can't handle complex addresses yet"); + const Register dest_reg = dest->as_pointer_register(); + const Register base_reg = addr->base()->as_pointer_register(); + + if (patch_code != lir_patch_none) { + PatchingStub* patch = new PatchingStub(_masm, PatchingStub::access_field_id); + assert(addr->disp() != 0, "must have"); + assert(base_reg != G3_scratch, "invariant"); + __ patchable_set(0, G3_scratch); + patching_epilog(patch, patch_code, base_reg, info); + assert(dest_reg != G3_scratch, "invariant"); + if (addr->index()->is_valid()) { + const Register index_reg = addr->index()->as_pointer_register(); + assert(index_reg != G3_scratch, "invariant"); + __ add(index_reg, G3_scratch, G3_scratch); + } + __ add(base_reg, G3_scratch, dest_reg); + } else { + if (Assembler::is_simm13(addr->disp())) { + if (addr->index()->is_valid()) { + const Register index_reg = addr->index()->as_pointer_register(); + assert(index_reg != G3_scratch, "invariant"); + __ add(base_reg, addr->disp(), G3_scratch); + __ add(index_reg, G3_scratch, dest_reg); + } else { + __ add(base_reg, addr->disp(), dest_reg); + } + } else { + __ set(addr->disp(), G3_scratch); + if (addr->index()->is_valid()) { + const Register index_reg = addr->index()->as_pointer_register(); + assert(index_reg != G3_scratch, "invariant"); + __ add(index_reg, G3_scratch, G3_scratch); + } + __ add(base_reg, G3_scratch, dest_reg); + } + } +} + + +void LIR_Assembler::get_thread(LIR_Opr result_reg) { + assert(result_reg->is_register(), "check"); + __ mov(G2_thread, result_reg->as_register()); +} + +#ifdef ASSERT +// emit run-time assertion +void LIR_Assembler::emit_assert(LIR_OpAssert* op) { + assert(op->code() == lir_assert, "must be"); + + if (op->in_opr1()->is_valid()) { + assert(op->in_opr2()->is_valid(), "both operands must be valid"); + comp_op(op->condition(), op->in_opr1(), op->in_opr2(), op); + } else { + assert(op->in_opr2()->is_illegal(), "both operands must be illegal"); + assert(op->condition() == lir_cond_always, "no other conditions allowed"); + } + + Label ok; + if (op->condition() != lir_cond_always) { + Assembler::Condition acond; + switch (op->condition()) { + case lir_cond_equal: acond = Assembler::equal; break; + case lir_cond_notEqual: acond = Assembler::notEqual; break; + case lir_cond_less: acond = Assembler::less; break; + case lir_cond_lessEqual: acond = Assembler::lessEqual; break; + case lir_cond_greaterEqual: acond = Assembler::greaterEqual; break; + case lir_cond_greater: acond = Assembler::greater; break; + case lir_cond_aboveEqual: acond = Assembler::greaterEqualUnsigned; break; + case lir_cond_belowEqual: acond = Assembler::lessEqualUnsigned; break; + default: ShouldNotReachHere(); + }; + __ br(acond, false, Assembler::pt, ok); + __ delayed()->nop(); + } + if (op->halt()) { + const char* str = __ code_string(op->msg()); + __ stop(str); + } else { + breakpoint(); + } + __ bind(ok); +} +#endif + +void LIR_Assembler::peephole(LIR_List* lir) { + LIR_OpList* inst = lir->instructions_list(); + for (int i = 0; i < inst->length(); i++) { + LIR_Op* op = inst->at(i); + switch (op->code()) { + case lir_cond_float_branch: + case lir_branch: { + LIR_OpBranch* branch = op->as_OpBranch(); + assert(branch->info() == NULL, "shouldn't be state on branches anymore"); + LIR_Op* delay_op = NULL; + // we'd like to be able to pull following instructions into + // this slot but we don't know enough to do it safely yet so + // only optimize block to block control flow. + if (branch->block()) { + LIR_Op* prev = inst->at(i - 1); + if (prev && LIR_Assembler::is_single_instruction(prev) && prev->info() == NULL) { + // swap previous instruction into delay slot + inst->at_put(i - 1, op); + inst->at_put(i, new LIR_OpDelay(prev, op->info())); +#ifndef PRODUCT + if (LIRTracePeephole) { + tty->print_cr("delayed"); + inst->at(i - 1)->print(); + inst->at(i)->print(); + tty->cr(); + } +#endif + continue; + } + } + + if (!delay_op) { + delay_op = new LIR_OpDelay(new LIR_Op0(lir_nop), NULL); + } + inst->insert_before(i + 1, delay_op); + break; + } + case lir_static_call: + case lir_icvirtual_call: + case lir_optvirtual_call: + case lir_dynamic_call: { + LIR_Op* prev = inst->at(i - 1); + if (prev && prev->code() == lir_move && prev->info() == NULL && + (!prev->result_opr()->is_single_cpu() || + prev->result_opr()->as_register() != O0) && + LIR_Assembler::is_single_instruction(prev)) { + // Only moves without info can be put into the delay slot. + inst->at_put(i - 1, op); + inst->at_put(i, new LIR_OpDelay(prev, op->info())); +#ifndef PRODUCT + if (LIRTracePeephole) { + tty->print_cr("delayed"); + inst->at(i - 1)->print(); + inst->at(i)->print(); + tty->cr(); + } +#endif + } else { + LIR_Op* delay_op = new LIR_OpDelay(new LIR_Op0(lir_nop), op->as_OpJavaCall()->info()); + inst->insert_before(i + 1, delay_op); + i++; + } + break; + } + } + } +} + +void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp) { + LIR_Address* addr = src->as_address_ptr(); + + assert(data == dest, "swap uses only 2 operands"); + assert (code == lir_xchg, "no xadd on sparc"); + + if (data->type() == T_INT) { + __ swap(as_Address(addr), data->as_register()); + } else if (data->is_oop()) { + Register obj = data->as_register(); + Register narrow = tmp->as_register(); + assert(UseCompressedOops, "swap is 32bit only"); + __ encode_heap_oop(obj, narrow); + __ swap(as_Address(addr), narrow); + __ decode_heap_oop(narrow, obj); + } else { + ShouldNotReachHere(); + } +} + +#undef __ diff -ur --new-file a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.hpp b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.hpp --- a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.hpp 2023-04-16 11:42:11.058348699 +0000 @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_C1_LIRASSEMBLER_SPARC_HPP +#define CPU_SPARC_C1_LIRASSEMBLER_SPARC_HPP + + private: + + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // + // Sparc load/store emission + // + // The sparc ld/st instructions cannot accommodate displacements > 13 bits long. + // The following "pseudo" sparc instructions (load/store) make it easier to use the indexed addressing mode + // by allowing 32 bit displacements: + // + // When disp <= 13 bits long, a single load or store instruction is emitted with (disp + [d]). + // When disp > 13 bits long, code is emitted to set the displacement into the O7 register, + // and then a load or store is emitted with ([O7] + [d]). + // + + int store(LIR_Opr from_reg, Register base, int offset, BasicType type, bool wide, bool unaligned); + int store(LIR_Opr from_reg, Register base, Register disp, BasicType type, bool wide); + + int load(Register base, int offset, LIR_Opr to_reg, BasicType type, bool wide, bool unaligned); + int load(Register base, Register disp, LIR_Opr to_reg, BasicType type, bool wide); + + void monitorexit(LIR_Opr obj_opr, LIR_Opr lock_opr, Register hdr, int monitor_no); + + int shift_amount(BasicType t); + + static bool is_single_instruction(LIR_Op* op); + + // Record the type of the receiver in ReceiverTypeData + void type_profile_helper(Register mdo, int mdo_offset_bias, + ciMethodData *md, ciProfileData *data, + Register recv, Register tmp1, Label* update_done); + // Setup pointers to MDO, MDO slot, also compute offset bias to access the slot. + void setup_md_access(ciMethod* method, int bci, + ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias); + + enum { + _call_stub_size = 68, + _exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(128), + _deopt_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(64) + }; + + public: + void pack64(LIR_Opr src, LIR_Opr dst); + void unpack64(LIR_Opr src, LIR_Opr dst); + +#endif // CPU_SPARC_C1_LIRASSEMBLER_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp --- a/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp 2023-04-16 11:42:11.058793430 +0000 @@ -0,0 +1,1267 @@ +/* + * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "c1/c1_Compilation.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_Instruction.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_LIRGenerator.hpp" +#include "c1/c1_Runtime1.hpp" +#include "c1/c1_ValueStack.hpp" +#include "ci/ciArray.hpp" +#include "ci/ciObjArrayKlass.hpp" +#include "ci/ciTypeArrayKlass.hpp" +#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/powerOfTwo.hpp" +#include "vmreg_sparc.inline.hpp" + +#ifdef ASSERT +#define __ gen()->lir(__FILE__, __LINE__)-> +#else +#define __ gen()->lir()-> +#endif + +void LIRItem::load_byte_item() { + // byte loads use same registers as other loads + load_item(); +} + + +void LIRItem::load_nonconstant() { + LIR_Opr r = value()->operand(); + if (_gen->can_inline_as_constant(value())) { + if (!r->is_constant()) { + r = LIR_OprFact::value_type(value()->type()); + } + _result = r; + } else { + load_item(); + } +} + + +//-------------------------------------------------------------- +// LIRGenerator +//-------------------------------------------------------------- + +LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::Oexception_opr; } +LIR_Opr LIRGenerator::exceptionPcOpr() { return FrameMap::Oissuing_pc_opr; } +LIR_Opr LIRGenerator::syncLockOpr() { return new_register(T_INT); } +LIR_Opr LIRGenerator::syncTempOpr() { return new_register(T_OBJECT); } +LIR_Opr LIRGenerator::getThreadTemp() { return rlock_callee_saved(T_LONG); } + +LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) { + LIR_Opr opr; + switch (type->tag()) { + case intTag: opr = callee ? FrameMap::I0_opr : FrameMap::O0_opr; break; + case objectTag: opr = callee ? FrameMap::I0_oop_opr : FrameMap::O0_oop_opr; break; + case longTag: opr = callee ? FrameMap::in_long_opr : FrameMap::out_long_opr; break; + case floatTag: opr = FrameMap::F0_opr; break; + case doubleTag: opr = FrameMap::F0_double_opr; break; + + case addressTag: + default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr; + } + + assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch"); + return opr; +} + +LIR_Opr LIRGenerator::rlock_callee_saved(BasicType type) { + LIR_Opr reg = new_register(type); + set_vreg_flag(reg, callee_saved); + return reg; +} + + +LIR_Opr LIRGenerator::rlock_byte(BasicType type) { + return new_register(T_INT); +} + + + + + +//--------- loading items into registers -------------------------------- + +// SPARC cannot inline all constants +bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const { + if (v->type()->as_IntConstant() != NULL) { + return v->type()->as_IntConstant()->value() == 0; + } else if (v->type()->as_LongConstant() != NULL) { + return v->type()->as_LongConstant()->value() == 0L; + } else if (v->type()->as_ObjectConstant() != NULL) { + return v->type()->as_ObjectConstant()->value()->is_null_object(); + } else { + return false; + } +} + + +// only simm13 constants can be inlined +bool LIRGenerator:: can_inline_as_constant(Value i) const { + if (i->type()->as_IntConstant() != NULL) { + return Assembler::is_simm13(i->type()->as_IntConstant()->value()); + } else { + return can_store_as_constant(i, as_BasicType(i->type())); + } +} + + +bool LIRGenerator:: can_inline_as_constant(LIR_Const* c) const { + if (c->type() == T_INT) { + return Assembler::is_simm13(c->as_jint()); + } + return false; +} + + +LIR_Opr LIRGenerator::safepoint_poll_register() { + return new_register(T_INT); +} + + + +LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index, + int shift, int disp, BasicType type) { + assert(base->is_register(), "must be"); + intx large_disp = disp; + + // accumulate fixed displacements + if (index->is_constant()) { + large_disp += (intx)(index->as_constant_ptr()->as_jint()) << shift; + index = LIR_OprFact::illegalOpr; + } + + if (index->is_register()) { + // apply the shift and accumulate the displacement + if (shift > 0) { + LIR_Opr tmp = new_pointer_register(); + __ shift_left(index, shift, tmp); + index = tmp; + } + if (large_disp != 0) { + LIR_Opr tmp = new_pointer_register(); + if (Assembler::is_simm13(large_disp)) { + __ add(tmp, LIR_OprFact::intptrConst(large_disp), tmp); + index = tmp; + } else { + __ move(LIR_OprFact::intptrConst(large_disp), tmp); + __ add(tmp, index, tmp); + index = tmp; + } + large_disp = 0; + } + } else if (large_disp != 0 && !Assembler::is_simm13(large_disp)) { + // index is illegal so replace it with the displacement loaded into a register + index = new_pointer_register(); + __ move(LIR_OprFact::intptrConst(large_disp), index); + large_disp = 0; + } + + // at this point we either have base + index or base + displacement + if (large_disp == 0) { + return new LIR_Address(base, index, type); + } else { + assert(Assembler::is_simm13(large_disp), "must be"); + return new LIR_Address(base, large_disp, type); + } +} + + +LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr, + BasicType type) { + int elem_size = type2aelembytes(type); + int shift = exact_log2(elem_size); + + LIR_Opr base_opr; + intx offset = arrayOopDesc::base_offset_in_bytes(type); + + if (index_opr->is_constant()) { + intx i = index_opr->as_constant_ptr()->as_jint(); + intx array_offset = i * elem_size; + if (Assembler::is_simm13(array_offset + offset)) { + base_opr = array_opr; + offset = array_offset + offset; + } else { + base_opr = new_pointer_register(); + if (Assembler::is_simm13(array_offset)) { + __ add(array_opr, LIR_OprFact::intptrConst(array_offset), base_opr); + } else { + __ move(LIR_OprFact::intptrConst(array_offset), base_opr); + __ add(base_opr, array_opr, base_opr); + } + } + } else { + if (index_opr->type() == T_INT) { + LIR_Opr tmp = new_register(T_LONG); + __ convert(Bytecodes::_i2l, index_opr, tmp); + index_opr = tmp; + } + + base_opr = new_pointer_register(); + assert (index_opr->is_register(), "Must be register"); + if (shift > 0) { + __ shift_left(index_opr, shift, base_opr); + __ add(base_opr, array_opr, base_opr); + } else { + __ add(index_opr, array_opr, base_opr); + } + } + + return new LIR_Address(base_opr, offset, type); +} + +LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) { + LIR_Opr r; + if (type == T_LONG) { + r = LIR_OprFact::longConst(x); + } else if (type == T_INT) { + r = LIR_OprFact::intConst(x); + } else { + ShouldNotReachHere(); + } + if (!Assembler::is_simm13(x)) { + LIR_Opr tmp = new_register(type); + __ move(r, tmp); + return tmp; + } + return r; +} + +void LIRGenerator::increment_counter(address counter, BasicType type, int step) { + LIR_Opr pointer = new_pointer_register(); + __ move(LIR_OprFact::intptrConst(counter), pointer); + LIR_Address* addr = new LIR_Address(pointer, type); + increment_counter(addr, step); +} + +void LIRGenerator::increment_counter(LIR_Address* addr, int step) { + LIR_Opr temp = new_register(addr->type()); + __ move(addr, temp); + __ add(temp, load_immediate(step, addr->type()), temp); + __ move(temp, addr); +} + +void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { + LIR_Opr o7opr = FrameMap::O7_opr; + __ load(new LIR_Address(base, disp, T_INT), o7opr, info); + __ cmp(condition, o7opr, c); +} + + +void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { + LIR_Opr o7opr = FrameMap::O7_opr; + __ load(new LIR_Address(base, disp, type), o7opr, info); + __ cmp(condition, reg, o7opr); +} + + +bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) { + assert(left != result, "should be different registers"); + if (is_power_of_2(c + 1)) { + __ shift_left(left, log2i_exact(c + 1), result); + __ sub(result, left, result); + return true; + } else if (is_power_of_2(c - 1)) { + __ shift_left(left, log2i_exact(c - 1), result); + __ add(result, left, result); + return true; + } + return false; +} + + +void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) { + BasicType t = item->type(); + LIR_Opr sp_opr = FrameMap::SP_opr; + if ((t == T_LONG || t == T_DOUBLE) && + ((in_bytes(offset_from_sp) - STACK_BIAS) % 8 != 0)) { + __ unaligned_move(item, new LIR_Address(sp_opr, in_bytes(offset_from_sp), t)); + } else { + __ move(item, new LIR_Address(sp_opr, in_bytes(offset_from_sp), t)); + } +} + +void LIRGenerator::array_store_check(LIR_Opr value, LIR_Opr array, CodeEmitInfo* store_check_info, ciMethod* profiled_method, int profiled_bci) { + LIR_Opr tmp1 = FrameMap::G1_opr; + LIR_Opr tmp2 = FrameMap::G3_opr; + LIR_Opr tmp3 = FrameMap::G5_opr; + __ store_check(value, array, tmp1, tmp2, tmp3, store_check_info, profiled_method, profiled_bci); +} + +//---------------------------------------------------------------------- +// visitor functions +//---------------------------------------------------------------------- + +void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { + assert(x->is_pinned(),""); + LIRItem obj(x->obj(), this); + obj.load_item(); + + set_no_result(x); + + LIR_Opr lock = FrameMap::G1_opr; + LIR_Opr scratch = FrameMap::G3_opr; + LIR_Opr hdr = FrameMap::G4_opr; + + CodeEmitInfo* info_for_exception = NULL; + if (x->needs_null_check()) { + info_for_exception = state_for(x); + } + + // this CodeEmitInfo must not have the xhandlers because here the + // object is already locked (xhandlers expects object to be unlocked) + CodeEmitInfo* info = state_for(x, x->state(), true); + monitor_enter(obj.result(), lock, hdr, scratch, x->monitor_no(), info_for_exception, info); +} + + +void LIRGenerator::do_MonitorExit(MonitorExit* x) { + assert(x->is_pinned(),""); + LIRItem obj(x->obj(), this); + obj.dont_load_item(); + + set_no_result(x); + LIR_Opr lock = FrameMap::G1_opr; + LIR_Opr hdr = FrameMap::G3_opr; + LIR_Opr obj_temp = FrameMap::G4_opr; + monitor_exit(obj_temp, lock, hdr, LIR_OprFact::illegalOpr, x->monitor_no()); +} + + +// _ineg, _lneg, _fneg, _dneg +void LIRGenerator::do_NegateOp(NegateOp* x) { + LIRItem value(x->x(), this); + value.load_item(); + LIR_Opr reg = rlock_result(x); + __ negate(value.result(), reg); +} + + + +// for _fadd, _fmul, _fsub, _fdiv, _frem +// _dadd, _dmul, _dsub, _ddiv, _drem +void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) { + switch (x->op()) { + case Bytecodes::_fadd: + case Bytecodes::_fmul: + case Bytecodes::_fsub: + case Bytecodes::_fdiv: + case Bytecodes::_dadd: + case Bytecodes::_dmul: + case Bytecodes::_dsub: + case Bytecodes::_ddiv: { + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + left.load_item(); + right.load_item(); + rlock_result(x); + arithmetic_op_fpu(x->op(), x->operand(), left.result(), right.result()); + } + break; + + case Bytecodes::_frem: + case Bytecodes::_drem: { + address entry; + switch (x->op()) { + case Bytecodes::_frem: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem); + break; + case Bytecodes::_drem: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem); + break; + default: + ShouldNotReachHere(); + } + LIR_Opr result = call_runtime(x->x(), x->y(), entry, x->type(), NULL); + set_result(x, result); + } + break; + + default: ShouldNotReachHere(); + } +} + + +// for _ladd, _lmul, _lsub, _ldiv, _lrem +void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { + switch (x->op()) { + case Bytecodes::_lrem: + case Bytecodes::_lmul: + case Bytecodes::_ldiv: { + + if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) { + LIRItem right(x->y(), this); + right.load_item(); + + CodeEmitInfo* info = state_for(x); + LIR_Opr item = right.result(); + assert(item->is_register(), "must be"); + __ cmp(lir_cond_equal, item, LIR_OprFact::longConst(0)); + __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info)); + } + + address entry; + switch (x->op()) { + case Bytecodes::_lrem: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::lrem); + break; // check if dividend is 0 is done elsewhere + case Bytecodes::_ldiv: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::ldiv); + break; // check if dividend is 0 is done elsewhere + case Bytecodes::_lmul: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::lmul); + break; + default: + ShouldNotReachHere(); + } + + // order of arguments to runtime call is reversed. + LIR_Opr result = call_runtime(x->y(), x->x(), entry, x->type(), NULL); + set_result(x, result); + break; + } + case Bytecodes::_ladd: + case Bytecodes::_lsub: { + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + left.load_item(); + right.load_item(); + rlock_result(x); + + arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL); + break; + } + default: ShouldNotReachHere(); + } +} + + +// Returns if item is an int constant that can be represented by a simm13 +static bool is_simm13(LIR_Opr item) { + if (item->is_constant() && item->type() == T_INT) { + return Assembler::is_simm13(item->as_constant_ptr()->as_jint()); + } else { + return false; + } +} + + +// for: _iadd, _imul, _isub, _idiv, _irem +void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) { + bool is_div_rem = x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem; + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + // missing test if instr is commutative and if we should swap + right.load_nonconstant(); + assert(right.is_constant() || right.is_register(), "wrong state of right"); + left.load_item(); + rlock_result(x); + if (is_div_rem) { + CodeEmitInfo* info = state_for(x); + LIR_Opr tmp = FrameMap::G1_opr; + if (x->op() == Bytecodes::_irem) { + __ irem(left.result(), right.result(), x->operand(), tmp, info); + } else if (x->op() == Bytecodes::_idiv) { + __ idiv(left.result(), right.result(), x->operand(), tmp, info); + } + } else { + arithmetic_op_int(x->op(), x->operand(), left.result(), right.result(), FrameMap::G1_opr); + } +} + + +void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) { + ValueTag tag = x->type()->tag(); + assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters"); + switch (tag) { + case floatTag: + case doubleTag: do_ArithmeticOp_FPU(x); return; + case longTag: do_ArithmeticOp_Long(x); return; + case intTag: do_ArithmeticOp_Int(x); return; + } + ShouldNotReachHere(); +} + + +// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr +void LIRGenerator::do_ShiftOp(ShiftOp* x) { + LIRItem value(x->x(), this); + LIRItem count(x->y(), this); + // Long shift destroys count register + if (value.type()->is_long()) { + count.set_destroys_register(); + } + value.load_item(); + // the old backend doesn't support this + if (count.is_constant() && count.type()->as_IntConstant() != NULL && value.type()->is_int()) { + jint c = count.get_jint_constant() & 0x1f; + assert(c >= 0 && c < 32, "should be small"); + count.dont_load_item(); + } else { + count.load_item(); + } + LIR_Opr reg = rlock_result(x); + shift_op(x->op(), reg, value.result(), count.result(), LIR_OprFact::illegalOpr); +} + + +// _iand, _land, _ior, _lor, _ixor, _lxor +void LIRGenerator::do_LogicOp(LogicOp* x) { + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + left.load_item(); + right.load_nonconstant(); + LIR_Opr reg = rlock_result(x); + + logic_op(x->op(), reg, left.result(), right.result()); +} + + + +// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg +void LIRGenerator::do_CompareOp(CompareOp* x) { + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + left.load_item(); + right.load_item(); + LIR_Opr reg = rlock_result(x); + if (x->x()->type()->is_float_kind()) { + Bytecodes::Code code = x->op(); + __ fcmp2int(left.result(), right.result(), reg, (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl)); + } else if (x->x()->type()->tag() == longTag) { + __ lcmp2int(left.result(), right.result(), reg); + } else { + Unimplemented(); + } +} + +LIR_Opr LIRGenerator::atomic_cmpxchg(BasicType type, LIR_Opr addr, LIRItem& cmp_value, LIRItem& new_value) { + LIR_Opr result = new_register(T_INT); + LIR_Opr t1 = FrameMap::G1_opr; + LIR_Opr t2 = FrameMap::G3_opr; + cmp_value.load_item(); + new_value.load_item(); + if (is_reference_type(type)) { + __ cas_obj(addr->as_address_ptr()->base(), cmp_value.result(), new_value.result(), t1, t2); + } else if (type == T_INT) { + __ cas_int(addr->as_address_ptr()->base(), cmp_value.result(), new_value.result(), t1, t2); + } else if (type == T_LONG) { + __ cas_long(addr->as_address_ptr()->base(), cmp_value.result(), new_value.result(), t1, t2); + } else { + Unimplemented(); + } + __ cmove(lir_cond_equal, LIR_OprFact::intConst(1), LIR_OprFact::intConst(0), + result, type); + return result; +} + +LIR_Opr LIRGenerator::atomic_xchg(BasicType type, LIR_Opr addr, LIRItem& value) { + bool is_obj = is_reference_type(type); + LIR_Opr result = new_register(type); + LIR_Opr tmp = LIR_OprFact::illegalOpr; + + value.load_item(); + + if (is_obj) { + tmp = FrameMap::G3_opr; + } + + // Because we want a 2-arg form of xchg + __ move(value.result(), result); + __ xchg(addr, result, result, tmp); + return result; +} + +LIR_Opr LIRGenerator::atomic_add(BasicType type, LIR_Opr addr, LIRItem& value) { + Unimplemented(); + return LIR_OprFact::illegalOpr; +} + +void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { + switch (x->id()) { + case vmIntrinsics::_dabs: + case vmIntrinsics::_dsqrt: { + assert(x->number_of_arguments() == 1, "wrong type"); + LIRItem value(x->argument_at(0), this); + value.load_item(); + LIR_Opr dst = rlock_result(x); + + switch (x->id()) { + case vmIntrinsics::_dsqrt: { + __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr); + break; + } + case vmIntrinsics::_dabs: { + __ abs(value.result(), dst, LIR_OprFact::illegalOpr); + break; + } + } + break; + } + case vmIntrinsics::_dlog10: // fall through + case vmIntrinsics::_dlog: // fall through + case vmIntrinsics::_dsin: // fall through + case vmIntrinsics::_dtan: // fall through + case vmIntrinsics::_dcos: // fall through + case vmIntrinsics::_dexp: { + assert(x->number_of_arguments() == 1, "wrong type"); + + address runtime_entry = NULL; + switch (x->id()) { + case vmIntrinsics::_dsin: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); + break; + case vmIntrinsics::_dcos: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); + break; + case vmIntrinsics::_dtan: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); + break; + case vmIntrinsics::_dlog: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); + break; + case vmIntrinsics::_dlog10: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10); + break; + case vmIntrinsics::_dexp: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dexp); + break; + default: + ShouldNotReachHere(); + } + + LIR_Opr result = call_runtime(x->argument_at(0), runtime_entry, x->type(), NULL); + set_result(x, result); + break; + } + case vmIntrinsics::_dpow: { + assert(x->number_of_arguments() == 2, "wrong type"); + address runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dpow); + LIR_Opr result = call_runtime(x->argument_at(0), x->argument_at(1), runtime_entry, x->type(), NULL); + set_result(x, result); + break; + } + } +} + + +void LIRGenerator::do_ArrayCopy(Intrinsic* x) { + assert(x->number_of_arguments() == 5, "wrong type"); + + // Make all state_for calls early since they can emit code + CodeEmitInfo* info = state_for(x, x->state()); + + // Note: spill caller save before setting the item + LIRItem src (x->argument_at(0), this); + LIRItem src_pos (x->argument_at(1), this); + LIRItem dst (x->argument_at(2), this); + LIRItem dst_pos (x->argument_at(3), this); + LIRItem length (x->argument_at(4), this); + // load all values in callee_save_registers, as this makes the + // parameter passing to the fast case simpler + src.load_item_force (rlock_callee_saved(T_OBJECT)); + src_pos.load_item_force (rlock_callee_saved(T_INT)); + dst.load_item_force (rlock_callee_saved(T_OBJECT)); + dst_pos.load_item_force (rlock_callee_saved(T_INT)); + length.load_item_force (rlock_callee_saved(T_INT)); + + int flags; + ciArrayKlass* expected_type; + arraycopy_helper(x, &flags, &expected_type); + + __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(), + length.result(), rlock_callee_saved(T_INT), + expected_type, flags, info); + set_no_result(x); +} + +void LIRGenerator::do_update_CRC32(Intrinsic* x) { + // Make all state_for calls early since they can emit code + LIR_Opr result = rlock_result(x); + int flags = 0; + switch (x->id()) { + case vmIntrinsics::_updateCRC32: { + LIRItem crc(x->argument_at(0), this); + LIRItem val(x->argument_at(1), this); + // val is destroyed by update_crc32 + val.set_destroys_register(); + crc.load_item(); + val.load_item(); + __ update_crc32(crc.result(), val.result(), result); + break; + } + case vmIntrinsics::_updateBytesCRC32: + case vmIntrinsics::_updateByteBufferCRC32: { + + bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32); + + LIRItem crc(x->argument_at(0), this); + LIRItem buf(x->argument_at(1), this); + LIRItem off(x->argument_at(2), this); + LIRItem len(x->argument_at(3), this); + + buf.load_item(); + off.load_nonconstant(); + + LIR_Opr index = off.result(); + int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0; + if(off.result()->is_constant()) { + index = LIR_OprFact::illegalOpr; + offset += off.result()->as_jint(); + } + + LIR_Opr base_op = buf.result(); + + if (index->is_valid()) { + LIR_Opr tmp = new_register(T_LONG); + __ convert(Bytecodes::_i2l, index, tmp); + index = tmp; + if (index->is_constant()) { + offset += index->as_constant_ptr()->as_jint(); + index = LIR_OprFact::illegalOpr; + } else if (index->is_register()) { + LIR_Opr tmp2 = new_register(T_LONG); + LIR_Opr tmp3 = new_register(T_LONG); + __ move(base_op, tmp2); + __ move(index, tmp3); + __ add(tmp2, tmp3, tmp2); + base_op = tmp2; + } else { + ShouldNotReachHere(); + } + } + + LIR_Address* a = new LIR_Address(base_op, offset, T_BYTE); + + BasicTypeList signature(3); + signature.append(T_INT); + signature.append(T_ADDRESS); + signature.append(T_INT); + CallingConvention* cc = frame_map()->c_calling_convention(&signature); + const LIR_Opr result_reg = result_register_for(x->type()); + + LIR_Opr addr = new_pointer_register(); + __ leal(LIR_OprFact::address(a), addr); + + crc.load_item_force(cc->at(0)); + __ move(addr, cc->at(1)); + len.load_item_force(cc->at(2)); + + __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), getThreadTemp(), result_reg, cc->args()); + __ move(result_reg, result); + + break; + } + default: { + ShouldNotReachHere(); + } + } +} + +void LIRGenerator::do_update_CRC32C(Intrinsic* x) { + // Make all state_for calls early since they can emit code + LIR_Opr result = rlock_result(x); + int flags = 0; + switch (x->id()) { + case vmIntrinsics::_updateBytesCRC32C: + case vmIntrinsics::_updateDirectByteBufferCRC32C: { + + bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32C); + int array_offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0; + + LIRItem crc(x->argument_at(0), this); + LIRItem buf(x->argument_at(1), this); + LIRItem off(x->argument_at(2), this); + LIRItem end(x->argument_at(3), this); + + buf.load_item(); + off.load_nonconstant(); + end.load_nonconstant(); + + // len = end - off + LIR_Opr len = end.result(); + LIR_Opr tmpA = new_register(T_INT); + LIR_Opr tmpB = new_register(T_INT); + __ move(end.result(), tmpA); + __ move(off.result(), tmpB); + __ sub(tmpA, tmpB, tmpA); + len = tmpA; + + LIR_Opr index = off.result(); + + if(off.result()->is_constant()) { + index = LIR_OprFact::illegalOpr; + array_offset += off.result()->as_jint(); + } + + LIR_Opr base_op = buf.result(); + + if (index->is_valid()) { + LIR_Opr tmp = new_register(T_LONG); + __ convert(Bytecodes::_i2l, index, tmp); + index = tmp; + if (index->is_constant()) { + array_offset += index->as_constant_ptr()->as_jint(); + index = LIR_OprFact::illegalOpr; + } else if (index->is_register()) { + LIR_Opr tmp2 = new_register(T_LONG); + LIR_Opr tmp3 = new_register(T_LONG); + __ move(base_op, tmp2); + __ move(index, tmp3); + __ add(tmp2, tmp3, tmp2); + base_op = tmp2; + } else { + ShouldNotReachHere(); + } + } + + LIR_Address* a = new LIR_Address(base_op, array_offset, T_BYTE); + + BasicTypeList signature(3); + signature.append(T_INT); + signature.append(T_ADDRESS); + signature.append(T_INT); + CallingConvention* cc = frame_map()->c_calling_convention(&signature); + const LIR_Opr result_reg = result_register_for(x->type()); + + LIR_Opr addr = new_pointer_register(); + __ leal(LIR_OprFact::address(a), addr); + + crc.load_item_force(cc->at(0)); + __ move(addr, cc->at(1)); + __ move(len, cc->at(2)); + + __ call_runtime_leaf(StubRoutines::updateBytesCRC32C(), getThreadTemp(), result_reg, cc->args()); + __ move(result_reg, result); + + break; + } + default: { + ShouldNotReachHere(); + } + } +} + +void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) { + assert(x->number_of_arguments() == 3, "wrong type"); + assert(UseFMA, "Needs FMA instructions support."); + + LIRItem a(x->argument_at(0), this); + LIRItem b(x->argument_at(1), this); + LIRItem c(x->argument_at(2), this); + + a.load_item(); + b.load_item(); + c.load_item(); + + LIR_Opr ina = a.result(); + LIR_Opr inb = b.result(); + LIR_Opr inc = c.result(); + LIR_Opr res = rlock_result(x); + + switch (x->id()) { + case vmIntrinsics::_fmaF: __ fmaf(ina, inb, inc, res); break; + case vmIntrinsics::_fmaD: __ fmad(ina, inb, inc, res); break; + default: + ShouldNotReachHere(); + break; + } +} + +void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) { + fatal("vectorizedMismatch intrinsic is not implemented on this platform"); +} + +// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f +// _i2b, _i2c, _i2s +void LIRGenerator::do_Convert(Convert* x) { + + switch (x->op()) { + case Bytecodes::_f2l: + case Bytecodes::_d2l: + case Bytecodes::_d2i: + case Bytecodes::_l2f: + case Bytecodes::_l2d: { + + address entry; + switch (x->op()) { + case Bytecodes::_l2f: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::l2f); + break; + case Bytecodes::_l2d: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::l2d); + break; + case Bytecodes::_f2l: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::f2l); + break; + case Bytecodes::_d2l: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::d2l); + break; + case Bytecodes::_d2i: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::d2i); + break; + default: + ShouldNotReachHere(); + } + LIR_Opr result = call_runtime(x->value(), entry, x->type(), NULL); + set_result(x, result); + break; + } + + case Bytecodes::_i2f: + case Bytecodes::_i2d: { + LIRItem value(x->value(), this); + + LIR_Opr reg = rlock_result(x); + // To convert an int to double, we need to load the 32-bit int + // from memory into a single precision floating point register + // (even numbered). Then the sparc fitod instruction takes care + // of the conversion. This is a bit ugly, but is the best way to + // get the int value in a single precision floating point register + value.load_item(); + LIR_Opr tmp = force_to_spill(value.result(), T_FLOAT); + __ convert(x->op(), tmp, reg); + break; + } + break; + + case Bytecodes::_i2l: + case Bytecodes::_i2b: + case Bytecodes::_i2c: + case Bytecodes::_i2s: + case Bytecodes::_l2i: + case Bytecodes::_f2d: + case Bytecodes::_d2f: { // inline code + LIRItem value(x->value(), this); + + value.load_item(); + LIR_Opr reg = rlock_result(x); + // dropped final false argument + __ convert(x->op(), value.result(), reg); + } + break; + + case Bytecodes::_f2i: { + LIRItem value (x->value(), this); + value.set_destroys_register(); + value.load_item(); + LIR_Opr reg = rlock_result(x); + set_vreg_flag(reg, must_start_in_memory); + // dropped final false argument + __ convert(x->op(), value.result(), reg); + } + break; + + default: ShouldNotReachHere(); + } +} + + +void LIRGenerator::do_NewInstance(NewInstance* x) { + print_if_not_loaded(x); + + // This instruction can be deoptimized in the slow path : use + // O0 as result register. + const LIR_Opr reg = result_register_for(x->type()); + + CodeEmitInfo* info = state_for(x, x->state()); + LIR_Opr tmp1 = FrameMap::G1_oop_opr; + LIR_Opr tmp2 = FrameMap::G3_oop_opr; + LIR_Opr tmp3 = FrameMap::G4_oop_opr; + LIR_Opr tmp4 = FrameMap::O1_oop_opr; + LIR_Opr klass_reg = FrameMap::G5_metadata_opr; + new_instance(reg, x->klass(), x->is_unresolved(), tmp1, tmp2, tmp3, tmp4, klass_reg, info); + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + + +void LIRGenerator::do_NewTypeArray(NewTypeArray* x) { + // Evaluate state_for early since it may emit code + CodeEmitInfo* info = state_for(x, x->state()); + + LIRItem length(x->length(), this); + length.load_item(); + + LIR_Opr reg = result_register_for(x->type()); + LIR_Opr tmp1 = FrameMap::G1_oop_opr; + LIR_Opr tmp2 = FrameMap::G3_oop_opr; + LIR_Opr tmp3 = FrameMap::G4_oop_opr; + LIR_Opr tmp4 = FrameMap::O1_oop_opr; + LIR_Opr klass_reg = FrameMap::G5_metadata_opr; + LIR_Opr len = length.result(); + BasicType elem_type = x->elt_type(); + + __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg); + + CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info); + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + + +void LIRGenerator::do_NewObjectArray(NewObjectArray* x) { + // Evaluate state_for early since it may emit code. + CodeEmitInfo* info = state_for(x, x->state()); + // in case of patching (i.e., object class is not yet loaded), we need to reexecute the instruction + // and therefore provide the state before the parameters have been consumed + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || PatchALot) { + patching_info = state_for(x, x->state_before()); + } + + LIRItem length(x->length(), this); + length.load_item(); + + const LIR_Opr reg = result_register_for(x->type()); + LIR_Opr tmp1 = FrameMap::G1_oop_opr; + LIR_Opr tmp2 = FrameMap::G3_oop_opr; + LIR_Opr tmp3 = FrameMap::G4_oop_opr; + LIR_Opr tmp4 = FrameMap::O1_oop_opr; + LIR_Opr klass_reg = FrameMap::G5_metadata_opr; + LIR_Opr len = length.result(); + + CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info); + ciMetadata* obj = ciObjArrayKlass::make(x->klass()); + if (obj == ciEnv::unloaded_ciobjarrayklass()) { + BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error"); + } + klass2reg_with_patching(klass_reg, obj, patching_info); + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + + +void LIRGenerator::do_NewMultiArray(NewMultiArray* x) { + Values* dims = x->dims(); + int i = dims->length(); + LIRItemList* items = new LIRItemList(i, i, NULL); + while (i-- > 0) { + LIRItem* size = new LIRItem(dims->at(i), this); + items->at_put(i, size); + } + + // Evaluate state_for early since it may emit code. + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || PatchALot) { + patching_info = state_for(x, x->state_before()); + + // Cannot re-use same xhandlers for multiple CodeEmitInfos, so + // clone all handlers (NOTE: Usually this is handled transparently + // by the CodeEmitInfo cloning logic in CodeStub constructors but + // is done explicitly here because a stub isn't being used). + x->set_exception_handlers(new XHandlers(x->exception_handlers())); + } + CodeEmitInfo* info = state_for(x, x->state()); + + i = dims->length(); + while (i-- > 0) { + LIRItem* size = items->at(i); + size->load_item(); + store_stack_parameter (size->result(), + in_ByteSize(STACK_BIAS + + frame::memory_parameter_word_sp_offset * wordSize + + i * sizeof(jint))); + } + + // This instruction can be deoptimized in the slow path : use + // O0 as result register. + const LIR_Opr klass_reg = FrameMap::O0_metadata_opr; + klass2reg_with_patching(klass_reg, x->klass(), patching_info); + LIR_Opr rank = FrameMap::O1_opr; + __ move(LIR_OprFact::intConst(x->rank()), rank); + LIR_Opr varargs = FrameMap::as_pointer_opr(O2); + int offset_from_sp = (frame::memory_parameter_word_sp_offset * wordSize) + STACK_BIAS; + __ add(FrameMap::SP_opr, + LIR_OprFact::intptrConst(offset_from_sp), + varargs); + LIR_OprList* args = new LIR_OprList(3); + args->append(klass_reg); + args->append(rank); + args->append(varargs); + const LIR_Opr reg = result_register_for(x->type()); + __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id), + LIR_OprFact::illegalOpr, + reg, args, info); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + + +void LIRGenerator::do_BlockBegin(BlockBegin* x) { +} + + +void LIRGenerator::do_CheckCast(CheckCast* x) { + LIRItem obj(x->obj(), this); + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || (PatchALot && !x->is_incompatible_class_change_check() && !x->is_invokespecial_receiver_check())) { + // must do this before locking the destination register as an oop register, + // and before the obj is loaded (so x->obj()->item() is valid for creating a debug info location) + patching_info = state_for(x, x->state_before()); + } + obj.load_item(); + LIR_Opr out_reg = rlock_result(x); + CodeStub* stub; + CodeEmitInfo* info_for_exception = + (x->needs_exception_state() ? state_for(x) : + state_for(x, x->state_before(), true /*ignore_xhandler*/)); + + if (x->is_incompatible_class_change_check()) { + assert(patching_info == NULL, "can't patch this"); + stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, LIR_OprFact::illegalOpr, info_for_exception); + } else if (x->is_invokespecial_receiver_check()) { + assert(patching_info == NULL, "can't patch this"); + stub = new DeoptimizeStub(info_for_exception, + Deoptimization::Reason_class_check, + Deoptimization::Action_none); + } else { + stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, obj.result(), info_for_exception); + } + LIR_Opr tmp1 = FrameMap::G1_oop_opr; + LIR_Opr tmp2 = FrameMap::G3_oop_opr; + LIR_Opr tmp3 = FrameMap::G4_oop_opr; + __ checkcast(out_reg, obj.result(), x->klass(), tmp1, tmp2, tmp3, + x->direct_compare(), info_for_exception, patching_info, stub, + x->profiled_method(), x->profiled_bci()); +} + + +void LIRGenerator::do_InstanceOf(InstanceOf* x) { + LIRItem obj(x->obj(), this); + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || PatchALot) { + patching_info = state_for(x, x->state_before()); + } + // ensure the result register is not the input register because the result is initialized before the patching safepoint + obj.load_item(); + LIR_Opr out_reg = rlock_result(x); + LIR_Opr tmp1 = FrameMap::G1_oop_opr; + LIR_Opr tmp2 = FrameMap::G3_oop_opr; + LIR_Opr tmp3 = FrameMap::G4_oop_opr; + __ instanceof(out_reg, obj.result(), x->klass(), tmp1, tmp2, tmp3, + x->direct_compare(), patching_info, + x->profiled_method(), x->profiled_bci()); +} + + +void LIRGenerator::do_If(If* x) { + assert(x->number_of_sux() == 2, "inconsistency"); + ValueTag tag = x->x()->type()->tag(); + LIRItem xitem(x->x(), this); + LIRItem yitem(x->y(), this); + LIRItem* xin = &xitem; + LIRItem* yin = &yitem; + If::Condition cond = x->cond(); + + if (tag == longTag) { + // for longs, only conditions "eql", "neq", "lss", "geq" are valid; + // mirror for other conditions + if (cond == If::gtr || cond == If::leq) { + // swap inputs + cond = Instruction::mirror(cond); + xin = &yitem; + yin = &xitem; + } + xin->set_destroys_register(); + } + + LIR_Opr left = LIR_OprFact::illegalOpr; + LIR_Opr right = LIR_OprFact::illegalOpr; + + xin->load_item(); + left = xin->result(); + + if (is_simm13(yin->result())) { + // inline int constants which are small enough to be immediate operands + right = LIR_OprFact::value_type(yin->value()->type()); + } else if (tag == longTag && yin->is_constant() && yin->get_jlong_constant() == 0 && + (cond == If::eql || cond == If::neq)) { + // inline long zero + right = LIR_OprFact::value_type(yin->value()->type()); + } else if (tag == objectTag && yin->is_constant() && (yin->get_jobject_constant()->is_null_object())) { + right = LIR_OprFact::value_type(yin->value()->type()); + } else { + yin->load_item(); + right = yin->result(); + } + set_no_result(x); + + // add safepoint before generating condition code so it can be recomputed + if (x->is_safepoint()) { + // increment backedge counter if needed + increment_backedge_counter_conditionally(lir_cond(cond), left, right, state_for(x, x->state_before()), + x->tsux()->bci(), x->fsux()->bci(), x->profiled_bci()); + __ safepoint(safepoint_poll_register(), state_for(x, x->state_before())); + } + + __ cmp(lir_cond(cond), left, right); + // Generate branch profiling. Profiling code doesn't kill flags. + profile_branch(x, cond); + move_to_phi(x->state()); + if (x->x()->type()->is_float_kind()) { + __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux()); + } else { + __ branch(lir_cond(cond), right->type(), x->tsux()); + } + assert(x->default_sux() == x->fsux(), "wrong destination above"); + __ jump(x->default_sux()); +} + + +LIR_Opr LIRGenerator::getThreadPointer() { + return FrameMap::as_pointer_opr(G2); +} + + +void LIRGenerator::trace_block_entry(BlockBegin* block) { + __ move(LIR_OprFact::intConst(block->block_id()), FrameMap::O0_opr); + LIR_OprList* args = new LIR_OprList(1); + args->append(FrameMap::O0_opr); + address func = CAST_FROM_FN_PTR(address, Runtime1::trace_block_entry); + __ call_runtime_leaf(func, rlock_callee_saved(T_INT), LIR_OprFact::illegalOpr, args); +} + + +void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address, + CodeEmitInfo* info) { + __ store(value, address, info); +} + +void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result, + CodeEmitInfo* info) { + __ load(address, result, info); +} diff -ur --new-file a/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp --- a/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp 2023-04-16 11:42:11.058912603 +0000 @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/register.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_LIR.hpp" + +FloatRegister LIR_OprDesc::as_float_reg() const { + return FrameMap::nr2floatreg(fpu_regnr()); +} + +FloatRegister LIR_OprDesc::as_double_reg() const { + return FrameMap::nr2floatreg(fpu_regnrHi()); +} + +LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) { + assert(as_FloatRegister(reg2) != fnoreg, "Sparc holds double in two regs."); + return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | + (reg2 << LIR_OprDesc::reg2_shift) | + LIR_OprDesc::double_type | + LIR_OprDesc::fpu_register | + LIR_OprDesc::double_size); +} + +#ifndef PRODUCT +void LIR_Address::verify() const { + assert(scale() == times_1, "Scaled addressing mode not available on SPARC and should not be used"); + assert(disp() == 0 || index()->is_illegal(), "can't have both"); + assert(base()->is_cpu_register(), "wrong base operand"); + assert(index()->is_illegal() || index()->is_double_cpu(), "wrong index operand"); + assert(base()->type() == T_ADDRESS || base()->type() == T_OBJECT || base()->type() == T_LONG || base()->type() == T_METADATA, + "wrong type for addresses"); +} +#endif // PRODUCT diff -ur --new-file a/src/hotspot/cpu/sparc/c1_LinearScan_sparc.cpp b/src/hotspot/cpu/sparc/c1_LinearScan_sparc.cpp --- a/src/hotspot/cpu/sparc/c1_LinearScan_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/c1_LinearScan_sparc.cpp 2023-04-16 11:42:11.059037591 +0000 @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_Instruction.hpp" +#include "c1/c1_LinearScan.hpp" +#include "utilities/bitMap.inline.hpp" + +void LinearScan::allocate_fpu_stack() { + // No FPU stack on SPARC +} diff -ur --new-file a/src/hotspot/cpu/sparc/c1_LinearScan_sparc.hpp b/src/hotspot/cpu/sparc/c1_LinearScan_sparc.hpp --- a/src/hotspot/cpu/sparc/c1_LinearScan_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/c1_LinearScan_sparc.hpp 2023-04-16 11:42:11.059163312 +0000 @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_C1_LINEARSCAN_SPARC_HPP +#define CPU_SPARC_C1_LINEARSCAN_SPARC_HPP + +inline bool LinearScan::is_processed_reg_num(int reg_num) { + return reg_num < 26 || reg_num > 31; +} + +inline int LinearScan::num_physical_regs(BasicType type) { + // Sparc requires two cpu registers for long + // and two cpu registers for double + if (type == T_DOUBLE) { + return 2; + } + return 1; +} + + +inline bool LinearScan::requires_adjacent_regs(BasicType type) { + return type == T_DOUBLE; +} + +inline bool LinearScan::is_caller_save(int assigned_reg) { + return assigned_reg > pd_last_callee_saved_reg && assigned_reg <= pd_last_fpu_reg; +} + + +inline void LinearScan::pd_add_temps(LIR_Op* op) { + // No special case behaviours yet +} + + +inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) { + if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::callee_saved)) { + assert(cur->type() != T_FLOAT && cur->type() != T_DOUBLE, "cpu regs only"); + _first_reg = pd_first_callee_saved_reg; + _last_reg = pd_last_callee_saved_reg; + return true; + } else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT || cur->type() == T_ADDRESS || cur->type() == T_METADATA) { + _first_reg = pd_first_cpu_reg; + _last_reg = pd_last_allocatable_cpu_reg; + return true; + } + return false; +} + +#endif // CPU_SPARC_C1_LINEARSCAN_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/c1_MacroAssembler_sparc.cpp b/src/hotspot/cpu/sparc/c1_MacroAssembler_sparc.cpp --- a/src/hotspot/cpu/sparc/c1_MacroAssembler_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/c1_MacroAssembler_sparc.cpp 2023-04-16 11:42:11.059395735 +0000 @@ -0,0 +1,404 @@ +/* + * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "gc/shared/tlab_globals.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/arrayOop.hpp" +#include "oops/markWord.hpp" +#include "runtime/basicLock.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" + +void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) { + Label L; + const Register temp_reg = G3_scratch; + // Note: needs more testing of out-of-line vs. inline slow case + verify_oop(receiver); + load_klass(receiver, temp_reg); + cmp_and_brx_short(temp_reg, iCache, Assembler::equal, Assembler::pt, L); + AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub()); + jump_to(ic_miss, temp_reg); + delayed()->nop(); + align(CodeEntryAlignment); + bind(L); +} + + +void C1_MacroAssembler::explicit_null_check(Register base) { + Unimplemented(); +} + + +void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes) { + assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect"); + generate_stack_overflow_check(bang_size_in_bytes); + // Create the frame. + save_frame_c1(frame_size_in_bytes); +} + + +void C1_MacroAssembler::verified_entry(bool breakAtEntry) { + if (breakAtEntry) { + breakpoint_trap(); + } +} + + +void C1_MacroAssembler::lock_object(Register Rmark, Register Roop, Register Rbox, Register Rscratch, Label& slow_case) { + assert_different_registers(Rmark, Roop, Rbox, Rscratch); + + Label done; + + Address mark_addr(Roop, oopDesc::mark_offset_in_bytes()); + + // The following move must be the first instruction of emitted since debug + // information may be generated for it. + // Load object header + ld_ptr(mark_addr, Rmark); + + verify_oop(Roop); + + // save object being locked into the BasicObjectLock + st_ptr(Roop, Rbox, BasicObjectLock::obj_offset_in_bytes()); + + if (UseBiasedLocking) { + biased_locking_enter(Roop, Rmark, Rscratch, done, &slow_case); + } + + // Save Rbox in Rscratch to be used for the cas operation + mov(Rbox, Rscratch); + + // and mark it unlocked + or3(Rmark, markWord::unlocked_value, Rmark); + + // save unlocked object header into the displaced header location on the stack + st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); + + // compare object markWord with Rmark and if equal exchange Rscratch with object markWord + assert(mark_addr.disp() == 0, "cas must take a zero displacement"); + cas_ptr(mark_addr.base(), Rmark, Rscratch); + // if compare/exchange succeeded we found an unlocked object and we now have locked it + // hence we are done + cmp(Rmark, Rscratch); + brx(Assembler::equal, false, Assembler::pt, done); + delayed()->sub(Rscratch, SP, Rscratch); //pull next instruction into delay slot + // we did not find an unlocked object so see if this is a recursive case + // sub(Rscratch, SP, Rscratch); + assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); + andcc(Rscratch, 0xfffff003, Rscratch); + brx(Assembler::notZero, false, Assembler::pn, slow_case); + delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); + bind(done); +} + + +void C1_MacroAssembler::unlock_object(Register Rmark, Register Roop, Register Rbox, Label& slow_case) { + assert_different_registers(Rmark, Roop, Rbox); + + Label done; + + Address mark_addr(Roop, oopDesc::mark_offset_in_bytes()); + assert(mark_addr.disp() == 0, "cas must take a zero displacement"); + + if (UseBiasedLocking) { + // load the object out of the BasicObjectLock + ld_ptr(Rbox, BasicObjectLock::obj_offset_in_bytes(), Roop); + verify_oop(Roop); + biased_locking_exit(mark_addr, Rmark, done); + } + // Test first it it is a fast recursive unlock + ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark); + br_null_short(Rmark, Assembler::pt, done); + if (!UseBiasedLocking) { + // load object + ld_ptr(Rbox, BasicObjectLock::obj_offset_in_bytes(), Roop); + verify_oop(Roop); + } + + // Check if it is still a light weight lock, this is is true if we see + // the stack address of the basicLock in the markWord of the object + cas_ptr(mark_addr.base(), Rbox, Rmark); + cmp(Rbox, Rmark); + + brx(Assembler::notEqual, false, Assembler::pn, slow_case); + delayed()->nop(); + // Done + bind(done); +} + + +void C1_MacroAssembler::try_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register, must be global register for incr_allocated_bytes + Register t2, // temp register + Label& slow_case // continuation point if fast allocation fails +) { + RegisterOrConstant size_in_bytes = var_size_in_bytes->is_valid() + ? RegisterOrConstant(var_size_in_bytes) : RegisterOrConstant(con_size_in_bytes); + if (UseTLAB) { + tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case); + } else { + eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case); + incr_allocated_bytes(size_in_bytes, t1, t2); + } +} + + +void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register t1, Register t2) { + assert_different_registers(obj, klass, len, t1, t2); + if (UseBiasedLocking && !len->is_valid()) { + ld_ptr(klass, in_bytes(Klass::prototype_header_offset()), t1); + } else { + set((intx)markWord::prototype().value(), t1); + } + st_ptr(t1, obj, oopDesc::mark_offset_in_bytes()); + if (UseCompressedClassPointers) { + // Save klass + mov(klass, t1); + encode_klass_not_null(t1); + stw(t1, obj, oopDesc::klass_offset_in_bytes()); + } else { + st_ptr(klass, obj, oopDesc::klass_offset_in_bytes()); + } + if (len->is_valid()) { + st(len, obj, arrayOopDesc::length_offset_in_bytes()); + } else if (UseCompressedClassPointers) { + // otherwise length is in the class gap + store_klass_gap(G0, obj); + } +} + + +void C1_MacroAssembler::initialize_body(Register base, Register index) { + zero_memory(base, index); +} + + +void C1_MacroAssembler::allocate_object( + Register obj, // result: pointer to object after successful allocation + Register t1, // temp register + Register t2, // temp register, must be a global register for try_allocate + Register t3, // temp register + int hdr_size, // object header size in words + int obj_size, // object size in words + Register klass, // object klass + Label& slow_case // continuation point if fast allocation fails +) { + assert_different_registers(obj, t1, t2, t3, klass); + assert(klass == G5, "must be G5"); + + // allocate space & initialize header + if (!is_simm13(obj_size * wordSize)) { + // would need to use extra register to load + // object size => go the slow case for now + ba(slow_case); + delayed()->nop(); + return; + } + try_allocate(obj, noreg, obj_size * wordSize, t2, t3, slow_case); + + initialize_object(obj, klass, noreg, obj_size * HeapWordSize, t1, t2, /* is_tlab_allocated */ UseTLAB); +} + +void C1_MacroAssembler::initialize_object( + Register obj, // result: pointer to object after successful allocation + Register klass, // object klass + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, // temp register + bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB + ) { + const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize; + + initialize_header(obj, klass, noreg, t1, t2); + +#ifdef ASSERT + { + Label ok; + ld(klass, in_bytes(Klass::layout_helper_offset()), t1); + if (var_size_in_bytes != noreg) { + cmp_and_brx_short(t1, var_size_in_bytes, Assembler::equal, Assembler::pt, ok); + } else { + cmp_and_brx_short(t1, con_size_in_bytes, Assembler::equal, Assembler::pt, ok); + } + stop("bad size in initialize_object"); + should_not_reach_here(); + + bind(ok); + } + +#endif + + if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) { + // initialize body + const int threshold = 5 * HeapWordSize; // approximate break even point for code size + if (var_size_in_bytes != noreg) { + // use a loop + add(obj, hdr_size_in_bytes, t1); // compute address of first element + sub(var_size_in_bytes, hdr_size_in_bytes, t2); // compute size of body + initialize_body(t1, t2); + } else if (con_size_in_bytes <= threshold) { + // use explicit NULL stores + for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += HeapWordSize) st_ptr(G0, obj, i); + } else if (con_size_in_bytes > hdr_size_in_bytes) { + // use a loop + const Register base = t1; + const Register index = t2; + add(obj, hdr_size_in_bytes, base); // compute address of first element + // compute index = number of words to clear + set(con_size_in_bytes - hdr_size_in_bytes, index); + initialize_body(base, index); + } + } + + if (CURRENT_ENV->dtrace_alloc_probes()) { + assert(obj == O0, "must be"); + call(CAST_FROM_FN_PTR(address, Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)), + relocInfo::runtime_call_type); + delayed()->nop(); + } + + verify_oop(obj); +} + + +void C1_MacroAssembler::allocate_array( + Register obj, // result: pointer to array after successful allocation + Register len, // array length + Register t1, // temp register + Register t2, // temp register + Register t3, // temp register + int hdr_size, // object header size in words + int elt_size, // element size in bytes + Register klass, // object klass + Label& slow_case // continuation point if fast allocation fails +) { + assert_different_registers(obj, len, t1, t2, t3, klass); + assert(klass == G5, "must be G5"); + assert(t1 == G1, "must be G1"); + + // determine alignment mask + assert(!(BytesPerWord & 1), "must be a multiple of 2 for masking code to work"); + + // check for negative or excessive length + // note: the maximum length allowed is chosen so that arrays of any + // element size with this length are always smaller or equal + // to the largest integer (i.e., array size computation will + // not overflow) + set(max_array_allocation_length, t1); + cmp(len, t1); + br(Assembler::greaterUnsigned, false, Assembler::pn, slow_case); + + // compute array size + // note: if 0 <= len <= max_length, len*elt_size + header + alignment is + // smaller or equal to the largest integer; also, since top is always + // aligned, we can do the alignment here instead of at the end address + // computation + const Register arr_size = t1; + switch (elt_size) { + case 1: delayed()->mov(len, arr_size); break; + case 2: delayed()->sll(len, 1, arr_size); break; + case 4: delayed()->sll(len, 2, arr_size); break; + case 8: delayed()->sll(len, 3, arr_size); break; + default: ShouldNotReachHere(); + } + add(arr_size, hdr_size * wordSize + MinObjAlignmentInBytesMask, arr_size); // add space for header & alignment + and3(arr_size, ~MinObjAlignmentInBytesMask, arr_size); // align array size + + // allocate space & initialize header + if (UseTLAB) { + tlab_allocate(obj, arr_size, 0, t2, slow_case); + } else { + eden_allocate(obj, arr_size, 0, t2, t3, slow_case); + } + initialize_header(obj, klass, len, t2, t3); + + // initialize body + const Register base = t2; + const Register index = t3; + add(obj, hdr_size * wordSize, base); // compute address of first element + sub(arr_size, hdr_size * wordSize, index); // compute index = number of words to clear + initialize_body(base, index); + + if (CURRENT_ENV->dtrace_alloc_probes()) { + assert(obj == O0, "must be"); + call(CAST_FROM_FN_PTR(address, Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)), + relocInfo::runtime_call_type); + delayed()->nop(); + } + + verify_oop(obj); +} + + +#ifndef PRODUCT + +void C1_MacroAssembler::verify_stack_oop(int stack_offset) { + if (!VerifyOops) return; + verify_oop_addr(Address(SP, stack_offset + STACK_BIAS)); +} + +void C1_MacroAssembler::verify_not_null_oop(Register r) { + Label not_null; + br_notnull_short(r, Assembler::pt, not_null); + stop("non-null oop required"); + bind(not_null); + if (!VerifyOops) return; + verify_oop(r); +} + +void C1_MacroAssembler::invalidate_registers(bool iregisters, bool lregisters, bool oregisters, + Register preserve1, Register preserve2) { + if (iregisters) { + for (int i = 0; i < 6; i++) { + Register r = as_iRegister(i); + if (r != preserve1 && r != preserve2) set(0xdead, r); + } + } + if (oregisters) { + for (int i = 0; i < 6; i++) { + Register r = as_oRegister(i); + if (r != preserve1 && r != preserve2) set(0xdead, r); + } + } + if (lregisters) { + for (int i = 0; i < 8; i++) { + Register r = as_lRegister(i); + if (r != preserve1 && r != preserve2) set(0xdead, r); + } + } +} + + +#endif diff -ur --new-file a/src/hotspot/cpu/sparc/c1_MacroAssembler_sparc.hpp b/src/hotspot/cpu/sparc/c1_MacroAssembler_sparc.hpp --- a/src/hotspot/cpu/sparc/c1_MacroAssembler_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/c1_MacroAssembler_sparc.hpp 2023-04-16 11:42:11.059530379 +0000 @@ -0,0 +1,98 @@ +/* + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_C1_MACROASSEMBLER_SPARC_HPP +#define CPU_SPARC_C1_MACROASSEMBLER_SPARC_HPP + + void pd_init() { /* nothing to do */ } + + public: + void try_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + + void initialize_header(Register obj, Register klass, Register len, Register t1, Register t2); + void initialize_body(Register base, Register index); + + // locking/unlocking + void lock_object (Register Rmark, Register Roop, Register Rbox, Register Rscratch, Label& slow_case); + void unlock_object(Register Rmark, Register Roop, Register Rbox, Label& slow_case); + + void initialize_object( + Register obj, // result: pointer to object after successful allocation + Register klass, // object klass + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, // temp register + bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB + ); + + // allocation of fixed-size objects + // (can also be used to allocate fixed-size arrays, by setting + // hdr_size correctly and storing the array length afterwards) + void allocate_object( + Register obj, // result: pointer to object after successful allocation + Register t1, // temp register + Register t2, // temp register + Register t3, // temp register + int hdr_size, // object header size in words + int obj_size, // object size in words + Register klass, // object klass + Label& slow_case // continuation point if fast allocation fails + ); + + enum { + max_array_allocation_length = 0x01000000 // sparc friendly value, requires sethi only + }; + + // allocation of arrays + void allocate_array( + Register obj, // result: pointer to array after successful allocation + Register len, // array length + Register t1, // temp register + Register t2, // temp register + Register t3, // temp register + int hdr_size, // object header size in words + int elt_size, // element size in bytes + Register klass, // object klass + Label& slow_case // continuation point if fast allocation fails + ); + + // invalidates registers in this window + void invalidate_registers(bool iregisters, bool lregisters, bool oregisters, + Register preserve1 = noreg, Register preserve2 = noreg); + + // This platform only uses signal-based null checks. The Label is not needed. + void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); } + + void save_live_registers_no_oop_map(bool save_fpu_registers); + void restore_live_registers(bool restore_fpu_registers); + +#endif // CPU_SPARC_C1_MACROASSEMBLER_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/c1_Runtime1_sparc.cpp b/src/hotspot/cpu/sparc/c1_Runtime1_sparc.cpp --- a/src/hotspot/cpu/sparc/c1_Runtime1_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/c1_Runtime1_sparc.cpp 2023-04-16 11:42:11.059886353 +0000 @@ -0,0 +1,919 @@ +/* + * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "c1/c1_Defs.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "ci/ciUtilities.hpp" +#include "compiler/oopMap.hpp" +#include "gc/shared/cardTable.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/universe.hpp" +#include "nativeInst_sparc.hpp" +#include "oops/compiledICHolder.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/signature.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/macros.hpp" +#include "utilities/align.hpp" +#include "vmreg_sparc.inline.hpp" + +// Implementation of StubAssembler + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry_point, int number_of_arguments) { + // for sparc changing the number of arguments doesn't change + // anything about the frame size so we'll always lie and claim that + // we are only passing 1 argument. + set_num_rt_args(1); + + assert_not_delayed(); + // bang stack before going to runtime + set(-os::vm_page_size() + STACK_BIAS, G3_scratch); + st(G0, SP, G3_scratch); + + // debugging support + assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); + + set_last_Java_frame(SP, noreg); + if (VerifyThread) mov(G2_thread, O0); // about to be smashed; pass early + save_thread(L7_thread_cache); + // do the call + call(entry_point, relocInfo::runtime_call_type); + if (!VerifyThread) { + delayed()->mov(G2_thread, O0); // pass thread as first argument + } else { + delayed()->nop(); // (thread already passed) + } + int call_offset = offset(); // offset of return address + restore_thread(L7_thread_cache); + reset_last_Java_frame(); + + // check for pending exceptions + { Label L; + Address exception_addr(G2_thread, Thread::pending_exception_offset()); + ld_ptr(exception_addr, Gtemp); + br_null_short(Gtemp, pt, L); + Address vm_result_addr(G2_thread, JavaThread::vm_result_offset()); + st_ptr(G0, vm_result_addr); + Address vm_result_addr_2(G2_thread, JavaThread::vm_result_2_offset()); + st_ptr(G0, vm_result_addr_2); + + if (frame_size() == no_frame_size) { + // we use O7 linkage so that forward_exception_entry has the issuing PC + call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + delayed()->restore(); + } else if (_stub_id == Runtime1::forward_exception_id) { + should_not_reach_here(); + } else { + AddressLiteral exc(Runtime1::entry_for(Runtime1::forward_exception_id)); + jump_to(exc, G4); + delayed()->nop(); + } + bind(L); + } + + // get oop result if there is one and reset the value in the thread + if (oop_result1->is_valid()) { // get oop result if there is one and reset it in the thread + get_vm_result (oop_result1); + } else { + // be a little paranoid and clear the result + Address vm_result_addr(G2_thread, JavaThread::vm_result_offset()); + st_ptr(G0, vm_result_addr); + } + + // get second result if there is one and reset the value in the thread + if (metadata_result->is_valid()) { + get_vm_result_2 (metadata_result); + } else { + // be a little paranoid and clear the result + Address vm_result_addr_2(G2_thread, JavaThread::vm_result_2_offset()); + st_ptr(G0, vm_result_addr_2); + } + + return call_offset; +} + + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1) { + // O0 is reserved for the thread + mov(arg1, O1); + return call_RT(oop_result1, metadata_result, entry, 1); +} + + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2) { + // O0 is reserved for the thread + mov(arg1, O1); + mov(arg2, O2); assert(arg2 != O1, "smashed argument"); + return call_RT(oop_result1, metadata_result, entry, 2); +} + + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) { + // O0 is reserved for the thread + mov(arg1, O1); + mov(arg2, O2); assert(arg2 != O1, "smashed argument"); + mov(arg3, O3); assert(arg3 != O1 && arg3 != O2, "smashed argument"); + return call_RT(oop_result1, metadata_result, entry, 3); +} + +void StubAssembler::prologue(const char* name, bool must_gc_arguments) { + set_info(name, must_gc_arguments); +} + +void StubAssembler::epilogue() { + delayed()->restore(); +} + +// Implementation of Runtime1 + + +static int cpu_reg_save_offsets[FrameMap::nof_cpu_regs]; +static int fpu_reg_save_offsets[FrameMap::nof_fpu_regs]; +static int reg_save_size_in_words; +static int frame_size_in_bytes = -1; + +static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) { + assert(frame_size_in_bytes == sasm->total_frame_size_in_bytes(reg_save_size_in_words), + "mismatch in calculation"); + sasm->set_frame_size(frame_size_in_bytes / BytesPerWord); + int frame_size_in_slots = frame_size_in_bytes / sizeof(jint); + OopMap* oop_map = new OopMap(frame_size_in_slots, 0); + + int i; + for (i = 0; i < FrameMap::nof_cpu_regs; i++) { + Register r = as_Register(i); + if (r == G1 || r == G3 || r == G4 || r == G5) { + int sp_offset = cpu_reg_save_offsets[i]; + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), + r->as_VMReg()); + } + } + + if (save_fpu_registers) { + for (i = 0; i < FrameMap::nof_fpu_regs; i++) { + FloatRegister r = as_FloatRegister(i); + int sp_offset = fpu_reg_save_offsets[i]; + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), + r->as_VMReg()); + } + } + return oop_map; +} + +#define __ this-> + +void C1_MacroAssembler::save_live_registers_no_oop_map(bool save_fpu_registers) { + assert(frame_size_in_bytes == __ total_frame_size_in_bytes(reg_save_size_in_words), + "mismatch in calculation"); + __ save_frame_c1(frame_size_in_bytes); + + // Record volatile registers as callee-save values in an OopMap so their save locations will be + // propagated to the caller frame's RegisterMap during StackFrameStream construction (needed for + // deoptimization; see compiledVFrame::create_stack_value). The caller's I, L and O registers + // are saved in register windows - I's and L's in the caller's frame and O's in the stub frame + // (as the stub's I's) when the runtime routine called by the stub creates its frame. + // OopMap frame sizes are in c2 stack slot sizes (sizeof(jint)) + + int i; + for (i = 0; i < FrameMap::nof_cpu_regs; i++) { + Register r = as_Register(i); + if (r == G1 || r == G3 || r == G4 || r == G5) { + int sp_offset = cpu_reg_save_offsets[i]; + __ st_ptr(r, SP, (sp_offset * BytesPerWord) + STACK_BIAS); + } + } + + if (save_fpu_registers) { + for (i = 0; i < FrameMap::nof_fpu_regs; i++) { + FloatRegister r = as_FloatRegister(i); + int sp_offset = fpu_reg_save_offsets[i]; + __ stf(FloatRegisterImpl::S, r, SP, (sp_offset * BytesPerWord) + STACK_BIAS); + } + } +} + +void C1_MacroAssembler::restore_live_registers(bool restore_fpu_registers) { + for (int i = 0; i < FrameMap::nof_cpu_regs; i++) { + Register r = as_Register(i); + if (r == G1 || r == G3 || r == G4 || r == G5) { + __ ld_ptr(SP, (cpu_reg_save_offsets[i] * BytesPerWord) + STACK_BIAS, r); + } + } + + if (restore_fpu_registers) { + for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { + FloatRegister r = as_FloatRegister(i); + __ ldf(FloatRegisterImpl::S, SP, (fpu_reg_save_offsets[i] * BytesPerWord) + STACK_BIAS, r); + } + } +} + +#undef __ +#define __ sasm-> + +static OopMap* save_live_registers(StubAssembler* sasm, bool save_fpu_registers = true) { + __ save_live_registers_no_oop_map(save_fpu_registers); + return generate_oop_map(sasm, save_fpu_registers); +} + +static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) { + __ restore_live_registers(restore_fpu_registers); +} + + +void Runtime1::initialize_pd() { + // compute word offsets from SP at which live (non-windowed) registers are captured by stub routines + // + // A stub routine will have a frame that is at least large enough to hold + // a register window save area (obviously) and the volatile g registers + // and floating registers. A user of save_live_registers can have a frame + // that has more scratch area in it (although typically they will use L-regs). + // in that case the frame will look like this (stack growing down) + // + // FP -> | | + // | scratch mem | + // | " " | + // -------------- + // | float regs | + // | " " | + // --------------- + // | G regs | + // | " " | + // --------------- + // | abi reg. | + // | window save | + // | area | + // SP -> --------------- + // + int i; + int sp_offset = align_up((int)frame::register_save_words, 2); // start doubleword aligned + + // only G int registers are saved explicitly; others are found in register windows + for (i = 0; i < FrameMap::nof_cpu_regs; i++) { + Register r = as_Register(i); + if (r == G1 || r == G3 || r == G4 || r == G5) { + cpu_reg_save_offsets[i] = sp_offset; + sp_offset++; + } + } + + // all float registers are saved explicitly + assert(FrameMap::nof_fpu_regs == 32, "double registers not handled here"); + for (i = 0; i < FrameMap::nof_fpu_regs; i++) { + fpu_reg_save_offsets[i] = sp_offset; + sp_offset++; + } + reg_save_size_in_words = sp_offset - frame::memory_parameter_word_sp_offset; + // this should match assembler::total_frame_size_in_bytes, which + // isn't callable from this context. It's checked by an assert when + // it's used though. + frame_size_in_bytes = align_up(sp_offset * wordSize, 8); +} + + +OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, bool has_argument) { + // make a frame and preserve the caller's caller-save registers + OopMap* oop_map = save_live_registers(sasm); + int call_offset; + if (!has_argument) { + call_offset = __ call_RT(noreg, noreg, target); + } else { + call_offset = __ call_RT(noreg, noreg, target, G4, G5); + } + OopMapSet* oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + + __ should_not_reach_here(); + return oop_maps; +} + + +OopMapSet* Runtime1::generate_stub_call(StubAssembler* sasm, Register result, address target, + Register arg1, Register arg2, Register arg3) { + // make a frame and preserve the caller's caller-save registers + OopMap* oop_map = save_live_registers(sasm); + + int call_offset; + if (arg1 == noreg) { + call_offset = __ call_RT(result, noreg, target); + } else if (arg2 == noreg) { + call_offset = __ call_RT(result, noreg, target, arg1); + } else if (arg3 == noreg) { + call_offset = __ call_RT(result, noreg, target, arg1, arg2); + } else { + call_offset = __ call_RT(result, noreg, target, arg1, arg2, arg3); + } + OopMapSet* oop_maps = NULL; + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + restore_live_registers(sasm); + + __ ret(); + __ delayed()->restore(); + + return oop_maps; +} + + +OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) { + // make a frame and preserve the caller's caller-save registers + OopMap* oop_map = save_live_registers(sasm); + + // call the runtime patching routine, returns non-zero if nmethod got deopted. + int call_offset = __ call_RT(noreg, noreg, target); + OopMapSet* oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + + // re-execute the patched instruction or, if the nmethod was deoptmized, return to the + // deoptimization handler entry that will cause re-execution of the current bytecode + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + + Label no_deopt; + __ br_null_short(O0, Assembler::pt, no_deopt); + + // return to the deoptimization handler entry for unpacking and rexecute + // if we simply returned the we'd deopt as if any call we patched had just + // returned. + + restore_live_registers(sasm); + + AddressLiteral dest(deopt_blob->unpack_with_reexecution()); + __ jump_to(dest, O0); + __ delayed()->restore(); + + __ bind(no_deopt); + restore_live_registers(sasm); + __ ret(); + __ delayed()->restore(); + + return oop_maps; +} + +OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + OopMapSet* oop_maps = NULL; + // for better readability + const bool must_gc_arguments = true; + const bool dont_gc_arguments = false; + + // stub code & info for the different stubs + switch (id) { + case forward_exception_id: + { + oop_maps = generate_handle_exception(id, sasm); + } + break; + + case new_instance_id: + case fast_new_instance_id: + case fast_new_instance_init_check_id: + { + Register G5_klass = G5; // Incoming + Register O0_obj = O0; // Outgoing + + if (id == new_instance_id) { + __ set_info("new_instance", dont_gc_arguments); + } else if (id == fast_new_instance_id) { + __ set_info("fast new_instance", dont_gc_arguments); + } else { + assert(id == fast_new_instance_init_check_id, "bad StubID"); + __ set_info("fast new_instance init check", dont_gc_arguments); + } + + // If TLAB is disabled, see if there is support for inlining contiguous + // allocations. + // Otherwise, just go to the slow path. + if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) && + !UseTLAB && Universe::heap()->supports_inline_contig_alloc()) { + Label slow_path; + Register G1_obj_size = G1; + Register G3_t1 = G3; + Register G4_t2 = G4; + assert_different_registers(G5_klass, G1_obj_size, G3_t1, G4_t2); + + // Push a frame since we may do dtrace notification for the + // allocation which requires calling out and we don't want + // to stomp the real return address. + __ save_frame(0); + + if (id == fast_new_instance_init_check_id) { + // make sure the klass is initialized + __ ldub(G5_klass, in_bytes(InstanceKlass::init_state_offset()), G3_t1); + __ cmp(G3_t1, InstanceKlass::fully_initialized); + __ br(Assembler::notEqual, false, Assembler::pn, slow_path); + __ delayed()->nop(); + } +#ifdef ASSERT + // assert object can be fast path allocated + { + Label ok, not_ok; + __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size); + // make sure it's an instance (LH > 0) + __ cmp_and_br_short(G1_obj_size, 0, Assembler::lessEqual, Assembler::pn, not_ok); + __ btst(Klass::_lh_instance_slow_path_bit, G1_obj_size); + __ br(Assembler::zero, false, Assembler::pn, ok); + __ delayed()->nop(); + __ bind(not_ok); + __ stop("assert(can be fast path allocated)"); + __ should_not_reach_here(); + __ bind(ok); + } +#endif // ASSERT + + // If we got here then the TLAB allocation failed, so try allocating directly from eden. + // get the instance size + __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size); + __ eden_allocate(O0_obj, G1_obj_size, 0, G3_t1, G4_t2, slow_path); + __ incr_allocated_bytes(G1_obj_size, G3_t1, G4_t2); + + __ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2, /* is_tlab_allocated */ false); + __ verify_oop(O0_obj); + __ mov(O0, I0); + __ ret(); + __ delayed()->restore(); + + __ bind(slow_path); + + // pop this frame so generate_stub_call can push it's own + __ restore(); + } + + oop_maps = generate_stub_call(sasm, I0, CAST_FROM_FN_PTR(address, new_instance), G5_klass); + // I0->O0: new instance + } + + break; + + case counter_overflow_id: + // G4 contains bci, G5 contains method + oop_maps = generate_stub_call(sasm, noreg, CAST_FROM_FN_PTR(address, counter_overflow), G4, G5); + break; + + case new_type_array_id: + case new_object_array_id: + { + Register G5_klass = G5; // Incoming + Register G4_length = G4; // Incoming + Register O0_obj = O0; // Outgoing + + Address klass_lh(G5_klass, Klass::layout_helper_offset()); + assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise"); + assert(Klass::_lh_header_size_mask == 0xFF, "bytewise"); + // Use this offset to pick out an individual byte of the layout_helper: + const int klass_lh_header_size_offset = ((BytesPerInt - 1) // 3 - 2 selects byte {0,1,0,0} + - Klass::_lh_header_size_shift / BitsPerByte); + + if (id == new_type_array_id) { + __ set_info("new_type_array", dont_gc_arguments); + } else { + __ set_info("new_object_array", dont_gc_arguments); + } + +#ifdef ASSERT + // assert object type is really an array of the proper kind + { + Label ok; + Register G3_t1 = G3; + __ ld(klass_lh, G3_t1); + __ sra(G3_t1, Klass::_lh_array_tag_shift, G3_t1); + int tag = ((id == new_type_array_id) + ? Klass::_lh_array_tag_type_value + : Klass::_lh_array_tag_obj_value); + __ cmp_and_brx_short(G3_t1, tag, Assembler::equal, Assembler::pt, ok); + __ stop("assert(is an array klass)"); + __ should_not_reach_here(); + __ bind(ok); + } +#endif // ASSERT + + if (id == new_type_array_id) { + oop_maps = generate_stub_call(sasm, I0, CAST_FROM_FN_PTR(address, new_type_array), G5_klass, G4_length); + } else { + oop_maps = generate_stub_call(sasm, I0, CAST_FROM_FN_PTR(address, new_object_array), G5_klass, G4_length); + } + // I0 -> O0: new array + } + break; + + case new_multi_array_id: + { // O0: klass + // O1: rank + // O2: address of 1st dimension + __ set_info("new_multi_array", dont_gc_arguments); + oop_maps = generate_stub_call(sasm, I0, CAST_FROM_FN_PTR(address, new_multi_array), I0, I1, I2); + // I0 -> O0: new multi array + } + break; + + case register_finalizer_id: + { + __ set_info("register_finalizer", dont_gc_arguments); + + // load the klass and check the has finalizer flag + Label register_finalizer; + Register t = O1; + __ load_klass(O0, t); + __ ld(t, in_bytes(Klass::access_flags_offset()), t); + __ set(JVM_ACC_HAS_FINALIZER, G3); + __ andcc(G3, t, G0); + __ br(Assembler::notZero, false, Assembler::pt, register_finalizer); + __ delayed()->nop(); + + // do a leaf return + __ retl(); + __ delayed()->nop(); + + __ bind(register_finalizer); + OopMap* oop_map = save_live_registers(sasm); + int call_offset = __ call_RT(noreg, noreg, + CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), I0); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + + // Now restore all the live registers + restore_live_registers(sasm); + + __ ret(); + __ delayed()->restore(); + } + break; + + case throw_range_check_failed_id: + { __ set_info("range_check_failed", dont_gc_arguments); // arguments will be discarded + // G4: index + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true); + } + break; + + case throw_index_exception_id: + { __ set_info("index_range_check_failed", dont_gc_arguments); // arguments will be discarded + // G4: index + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true); + } + break; + + case throw_div0_exception_id: + { __ set_info("throw_div0_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false); + } + break; + + case throw_null_pointer_exception_id: + { __ set_info("throw_null_pointer_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false); + } + break; + + case handle_exception_id: + { __ set_info("handle_exception", dont_gc_arguments); + oop_maps = generate_handle_exception(id, sasm); + } + break; + + case handle_exception_from_callee_id: + { __ set_info("handle_exception_from_callee", dont_gc_arguments); + oop_maps = generate_handle_exception(id, sasm); + } + break; + + case unwind_exception_id: + { + // O0: exception + // I7: address of call to this method + + __ set_info("unwind_exception", dont_gc_arguments); + __ mov(Oexception, Oexception->after_save()); + __ add(I7, frame::pc_return_offset, Oissuing_pc->after_save()); + + __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), + G2_thread, Oissuing_pc->after_save()); + __ verify_not_null_oop(Oexception->after_save()); + + // Restore SP from L7 if the exception PC is a method handle call site. + __ mov(O0, G5); // Save the target address. + __ lduw(Address(G2_thread, JavaThread::is_method_handle_return_offset()), L0); + __ tst(L0); // Condition codes are preserved over the restore. + __ restore(); + + __ jmp(G5, 0); + __ delayed()->movcc(Assembler::notZero, false, Assembler::icc, L7_mh_SP_save, SP); // Restore SP if required. + } + break; + + case throw_array_store_exception_id: + { + __ set_info("throw_array_store_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true); + } + break; + + case throw_class_cast_exception_id: + { + // G4: object + __ set_info("throw_class_cast_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true); + } + break; + + case throw_incompatible_class_change_error_id: + { + __ set_info("throw_incompatible_class_cast_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); + } + break; + + case slow_subtype_check_id: + { // Support for uint StubRoutine::partial_subtype_check( Klass sub, Klass super ); + // Arguments : + // + // ret : G3 + // sub : G3, argument, destroyed + // super: G1, argument, not changed + // raddr: O7, blown by call + Label miss; + + __ save_frame(0); // Blow no registers! + + __ check_klass_subtype_slow_path(G3, G1, L0, L1, L2, L4, NULL, &miss); + + __ mov(1, G3); + __ ret(); // Result in G5 is 'true' + __ delayed()->restore(); // free copy or add can go here + + __ bind(miss); + __ mov(0, G3); + __ ret(); // Result in G5 is 'false' + __ delayed()->restore(); // free copy or add can go here + } + + case monitorenter_nofpu_id: + case monitorenter_id: + { // G4: object + // G5: lock address + __ set_info("monitorenter", dont_gc_arguments); + + int save_fpu_registers = (id == monitorenter_id); + // make a frame and preserve the caller's caller-save registers + OopMap* oop_map = save_live_registers(sasm, save_fpu_registers); + + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), G4, G5); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + restore_live_registers(sasm, save_fpu_registers); + + __ ret(); + __ delayed()->restore(); + } + break; + + case monitorexit_nofpu_id: + case monitorexit_id: + { // G4: lock address + // note: really a leaf routine but must setup last java sp + // => use call_RT for now (speed can be improved by + // doing last java sp setup manually) + __ set_info("monitorexit", dont_gc_arguments); + + int save_fpu_registers = (id == monitorexit_id); + // make a frame and preserve the caller's caller-save registers + OopMap* oop_map = save_live_registers(sasm, save_fpu_registers); + + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), G4); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + restore_live_registers(sasm, save_fpu_registers); + + __ ret(); + __ delayed()->restore(); + } + break; + + case deoptimize_id: + { + __ set_info("deoptimize", dont_gc_arguments); + OopMap* oop_map = save_live_registers(sasm); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize), G4); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + restore_live_registers(sasm); + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + AddressLiteral dest(deopt_blob->unpack_with_reexecution()); + __ jump_to(dest, O0); + __ delayed()->restore(); + } + break; + + case access_field_patching_id: + { __ set_info("access_field_patching", dont_gc_arguments); + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching)); + } + break; + + case load_klass_patching_id: + { __ set_info("load_klass_patching", dont_gc_arguments); + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching)); + } + break; + + case load_mirror_patching_id: + { __ set_info("load_mirror_patching", dont_gc_arguments); + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching)); + } + break; + + case load_appendix_patching_id: + { __ set_info("load_appendix_patching", dont_gc_arguments); + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching)); + } + break; + + case dtrace_object_alloc_id: + { // O0: object + __ set_info("dtrace_object_alloc", dont_gc_arguments); + // we can't gc here so skip the oopmap but make sure that all + // the live registers get saved. + save_live_registers(sasm); + + __ save_thread(L7_thread_cache); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), + relocInfo::runtime_call_type); + __ delayed()->mov(I0, O0); + __ restore_thread(L7_thread_cache); + + restore_live_registers(sasm); + __ ret(); + __ delayed()->restore(); + } + break; + + case predicate_failed_trap_id: + { + __ set_info("predicate_failed_trap", dont_gc_arguments); + OopMap* oop_map = save_live_registers(sasm); + + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap)); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + restore_live_registers(sasm); + + AddressLiteral dest(deopt_blob->unpack_with_reexecution()); + __ jump_to(dest, O0); + __ delayed()->restore(); + } + break; + + default: + { __ set_info("unimplemented entry", dont_gc_arguments); + __ save_frame(0); + __ set((int)id, O1); + __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), O1); + __ should_not_reach_here(); + } + break; + } + return oop_maps; +} + + +OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler* sasm) { + __ block_comment("generate_handle_exception"); + + // Save registers, if required. + OopMapSet* oop_maps = new OopMapSet(); + OopMap* oop_map = NULL; + switch (id) { + case forward_exception_id: + // We're handling an exception in the context of a compiled frame. + // The registers have been saved in the standard places. Perform + // an exception lookup in the caller and dispatch to the handler + // if found. Otherwise unwind and dispatch to the callers + // exception handler. + oop_map = generate_oop_map(sasm, true); + + // transfer the pending exception to the exception_oop + __ ld_ptr(G2_thread, in_bytes(JavaThread::pending_exception_offset()), Oexception); + __ ld_ptr(Oexception, 0, G0); + __ st_ptr(G0, G2_thread, in_bytes(JavaThread::pending_exception_offset())); + __ add(I7, frame::pc_return_offset, Oissuing_pc); + break; + case handle_exception_id: + // At this point all registers MAY be live. + oop_map = save_live_registers(sasm); + __ mov(Oexception->after_save(), Oexception); + __ mov(Oissuing_pc->after_save(), Oissuing_pc); + break; + case handle_exception_from_callee_id: + // At this point all registers except exception oop (Oexception) + // and exception pc (Oissuing_pc) are dead. + oop_map = new OopMap(frame_size_in_bytes / sizeof(jint), 0); + sasm->set_frame_size(frame_size_in_bytes / BytesPerWord); + __ save_frame_c1(frame_size_in_bytes); + __ mov(Oexception->after_save(), Oexception); + __ mov(Oissuing_pc->after_save(), Oissuing_pc); + break; + default: ShouldNotReachHere(); + } + + __ verify_not_null_oop(Oexception); + +#ifdef ASSERT + // check that fields in JavaThread for exception oop and issuing pc are + // empty before writing to them + Label oop_empty; + Register scratch = I7; // We can use I7 here because it's overwritten later anyway. + __ ld_ptr(Address(G2_thread, JavaThread::exception_oop_offset()), scratch); + __ br_null(scratch, false, Assembler::pt, oop_empty); + __ delayed()->nop(); + __ stop("exception oop already set"); + __ bind(oop_empty); + + Label pc_empty; + __ ld_ptr(Address(G2_thread, JavaThread::exception_pc_offset()), scratch); + __ br_null(scratch, false, Assembler::pt, pc_empty); + __ delayed()->nop(); + __ stop("exception pc already set"); + __ bind(pc_empty); +#endif + + // save the exception and issuing pc in the thread + __ st_ptr(Oexception, G2_thread, in_bytes(JavaThread::exception_oop_offset())); + __ st_ptr(Oissuing_pc, G2_thread, in_bytes(JavaThread::exception_pc_offset())); + + // use the throwing pc as the return address to lookup (has bci & oop map) + __ mov(Oissuing_pc, I7); + __ sub(I7, frame::pc_return_offset, I7); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc)); + oop_maps->add_gc_map(call_offset, oop_map); + + // Note: if nmethod has been deoptimized then regardless of + // whether it had a handler or not we will deoptimize + // by entering the deopt blob with a pending exception. + + // Restore the registers that were saved at the beginning, remove + // the frame and jump to the exception handler. + switch (id) { + case forward_exception_id: + case handle_exception_id: + restore_live_registers(sasm); + __ jmp(O0, 0); + __ delayed()->restore(); + break; + case handle_exception_from_callee_id: + // Restore SP from L7 if the exception PC is a method handle call site. + __ mov(O0, G5); // Save the target address. + __ lduw(Address(G2_thread, JavaThread::is_method_handle_return_offset()), L0); + __ tst(L0); // Condition codes are preserved over the restore. + __ restore(); + + __ jmp(G5, 0); // jump to the exception handler + __ delayed()->movcc(Assembler::notZero, false, Assembler::icc, L7_mh_SP_save, SP); // Restore SP if required. + break; + default: ShouldNotReachHere(); + } + + return oop_maps; +} + + +#undef __ + +const char *Runtime1::pd_name_for_address(address entry) { + return ""; +} diff -ur --new-file a/src/hotspot/cpu/sparc/c1_globals_sparc.hpp b/src/hotspot/cpu/sparc/c1_globals_sparc.hpp --- a/src/hotspot/cpu/sparc/c1_globals_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/c1_globals_sparc.hpp 2023-04-16 11:42:11.059994661 +0000 @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_C1_GLOBALS_SPARC_HPP +#define CPU_SPARC_C1_GLOBALS_SPARC_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the client compiler. +// (see c1_globals.hpp) + +#ifndef COMPILER2 +define_pd_global(bool, BackgroundCompilation, true ); +define_pd_global(bool, CICompileOSR, true ); +define_pd_global(bool, InlineIntrinsics, true ); +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, false); +define_pd_global(bool, UseOnStackReplacement, true ); +define_pd_global(bool, TieredCompilation, false); +define_pd_global(intx, CompileThreshold, 1000 ); // Design center runs on 1.3.1 + +define_pd_global(intx, OnStackReplacePercentage, 1400 ); +define_pd_global(bool, UseTLAB, true ); +define_pd_global(bool, ProfileInterpreter, false); +define_pd_global(bool, ResizeTLAB, true ); +define_pd_global(uintx, ReservedCodeCacheSize, 32*M ); +define_pd_global(uintx, NonProfiledCodeHeapSize, 13*M ); +define_pd_global(uintx, ProfiledCodeHeapSize, 14*M ); +define_pd_global(uintx, NonNMethodCodeHeapSize, 5*M ); +define_pd_global(uintx, CodeCacheExpansionSize, 32*K ); +define_pd_global(uintx, CodeCacheMinBlockLength, 1); +define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); +define_pd_global(bool, NeverActAsServerClassMachine, true ); +define_pd_global(size_t, NewSizeThreadIncrease, 16*K ); +define_pd_global(uint64_t, MaxRAM, 1ULL*G); +define_pd_global(uintx, InitialCodeCacheSize, 160*K); +#endif // !COMPILER2 + +define_pd_global(bool, UseTypeProfile, false); + +define_pd_global(bool, OptimizeSinglePrecision, false); +define_pd_global(bool, CSEArrayLength, true ); +define_pd_global(bool, TwoOperandLIRForm, false); + +#endif // CPU_SPARC_C1_GLOBALS_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/c2_MacroAssembler_sparc.cpp b/src/hotspot/cpu/sparc/c2_MacroAssembler_sparc.cpp --- a/src/hotspot/cpu/sparc/c2_MacroAssembler_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/c2_MacroAssembler_sparc.cpp 2023-04-16 11:42:11.060229698 +0000 @@ -0,0 +1,527 @@ +/* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "oops/arrayOop.hpp" +#include "opto/c2_MacroAssembler.hpp" +#include "opto/intrinsicnode.hpp" +#include "runtime/stubRoutines.hpp" + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#define STOP(error) stop(error) +#else +#define BLOCK_COMMENT(str) block_comment(str) +#define STOP(error) block_comment(error); stop(error) +#endif + +// Compress char[] to byte[] by compressing 16 bytes at once. Return 0 on failure. +void C2_MacroAssembler::string_compress_16(Register src, Register dst, Register cnt, Register result, + Register tmp1, Register tmp2, Register tmp3, Register tmp4, + FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, Label& Ldone) { + Label Lloop, Lslow; + assert(UseVIS >= 3, "VIS3 is required"); + assert_different_registers(src, dst, cnt, tmp1, tmp2, tmp3, tmp4, result); + assert_different_registers(ftmp1, ftmp2, ftmp3); + + // Check if cnt >= 8 (= 16 bytes) + cmp(cnt, 8); + br(Assembler::less, false, Assembler::pn, Lslow); + delayed()->mov(cnt, result); // copy count + + // Check for 8-byte alignment of src and dst + or3(src, dst, tmp1); + andcc(tmp1, 7, G0); + br(Assembler::notZero, false, Assembler::pn, Lslow); + delayed()->nop(); + + // Set mask for bshuffle instruction + Register mask = tmp4; + set(0x13579bdf, mask); + bmask(mask, G0, G0); + + // Set mask to 0xff00 ff00 ff00 ff00 to check for non-latin1 characters + Assembler::sethi(0xff00fc00, mask); // mask = 0x0000 0000 ff00 fc00 + add(mask, 0x300, mask); // mask = 0x0000 0000 ff00 ff00 + sllx(mask, 32, tmp1); // tmp1 = 0xff00 ff00 0000 0000 + or3(mask, tmp1, mask); // mask = 0xff00 ff00 ff00 ff00 + + // Load first 8 bytes + ldx(src, 0, tmp1); + + bind(Lloop); + // Load next 8 bytes + ldx(src, 8, tmp2); + + // Check for non-latin1 character by testing if the most significant byte of a char is set. + // Although we have to move the data between integer and floating point registers, this is + // still faster than the corresponding VIS instructions (ford/fand/fcmpd). + or3(tmp1, tmp2, tmp3); + btst(tmp3, mask); + // annul zeroing if branch is not taken to preserve original count + brx(Assembler::notZero, true, Assembler::pn, Ldone); + delayed()->mov(G0, result); // 0 - failed + + // Move bytes into float register + movxtod(tmp1, ftmp1); + movxtod(tmp2, ftmp2); + + // Compress by copying one byte per char from ftmp1 and ftmp2 to ftmp3 + bshuffle(ftmp1, ftmp2, ftmp3); + stf(FloatRegisterImpl::D, ftmp3, dst, 0); + + // Increment addresses and decrement count + inc(src, 16); + inc(dst, 8); + dec(cnt, 8); + + cmp(cnt, 8); + // annul LDX if branch is not taken to prevent access past end of string + br(Assembler::greaterEqual, true, Assembler::pt, Lloop); + delayed()->ldx(src, 0, tmp1); + + // Fallback to slow version + bind(Lslow); +} + +// Compress char[] to byte[]. Return 0 on failure. +void C2_MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register result, Register tmp, Label& Ldone) { + Label Lloop; + assert_different_registers(src, dst, cnt, tmp, result); + + lduh(src, 0, tmp); + + bind(Lloop); + inc(src, sizeof(jchar)); + cmp(tmp, 0xff); + // annul zeroing if branch is not taken to preserve original count + br(Assembler::greater, true, Assembler::pn, Ldone); // don't check xcc + delayed()->mov(G0, result); // 0 - failed + deccc(cnt); + stb(tmp, dst, 0); + inc(dst); + // annul LDUH if branch is not taken to prevent access past end of string + br(Assembler::notZero, true, Assembler::pt, Lloop); + delayed()->lduh(src, 0, tmp); // hoisted +} + +// Inflate byte[] to char[] by inflating 16 bytes at once. +void C2_MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt, Register tmp, + FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, FloatRegister ftmp4, Label& Ldone) { + Label Lloop, Lslow; + assert(UseVIS >= 3, "VIS3 is required"); + assert_different_registers(src, dst, cnt, tmp); + assert_different_registers(ftmp1, ftmp2, ftmp3, ftmp4); + + // Check if cnt >= 8 (= 16 bytes) + cmp(cnt, 8); + br(Assembler::less, false, Assembler::pn, Lslow); + delayed()->nop(); + + // Check for 8-byte alignment of src and dst + or3(src, dst, tmp); + andcc(tmp, 7, G0); + br(Assembler::notZero, false, Assembler::pn, Lslow); + // Initialize float register to zero + FloatRegister zerof = ftmp4; + delayed()->fzero(FloatRegisterImpl::D, zerof); + + // Load first 8 bytes + ldf(FloatRegisterImpl::D, src, 0, ftmp1); + + bind(Lloop); + inc(src, 8); + dec(cnt, 8); + + // Inflate the string by interleaving each byte from the source array + // with a zero byte and storing the result in the destination array. + fpmerge(zerof, ftmp1->successor(), ftmp2); + stf(FloatRegisterImpl::D, ftmp2, dst, 8); + fpmerge(zerof, ftmp1, ftmp3); + stf(FloatRegisterImpl::D, ftmp3, dst, 0); + + inc(dst, 16); + + cmp(cnt, 8); + // annul LDX if branch is not taken to prevent access past end of string + br(Assembler::greaterEqual, true, Assembler::pt, Lloop); + delayed()->ldf(FloatRegisterImpl::D, src, 0, ftmp1); + + // Fallback to slow version + bind(Lslow); +} + +// Inflate byte[] to char[]. +void C2_MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp, Label& Ldone) { + Label Loop; + assert_different_registers(src, dst, cnt, tmp); + + ldub(src, 0, tmp); + bind(Loop); + inc(src); + deccc(cnt); + sth(tmp, dst, 0); + inc(dst, sizeof(jchar)); + // annul LDUB if branch is not taken to prevent access past end of string + br(Assembler::notZero, true, Assembler::pt, Loop); + delayed()->ldub(src, 0, tmp); // hoisted +} + +void C2_MacroAssembler::string_compare(Register str1, Register str2, + Register cnt1, Register cnt2, + Register tmp1, Register tmp2, + Register result, int ae) { + Label Ldone, Lloop; + assert_different_registers(str1, str2, cnt1, cnt2, tmp1, result); + int stride1, stride2; + + // Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a) + // we interchange str1 and str2 in the UL case and negate the result. + // Like this, str1 is always latin1 encoded, expect for the UU case. + + if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) { + srl(cnt2, 1, cnt2); + } + + // See if the lengths are different, and calculate min in cnt1. + // Save diff in case we need it for a tie-breaker. + Label Lskip; + Register diff = tmp1; + subcc(cnt1, cnt2, diff); + br(Assembler::greater, true, Assembler::pt, Lskip); + // cnt2 is shorter, so use its count: + delayed()->mov(cnt2, cnt1); + bind(Lskip); + + // Rename registers + Register limit1 = cnt1; + Register limit2 = limit1; + Register chr1 = result; + Register chr2 = cnt2; + if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) { + // We need an additional register to keep track of two limits + assert_different_registers(str1, str2, cnt1, cnt2, tmp1, tmp2, result); + limit2 = tmp2; + } + + // Is the minimum length zero? + cmp(limit1, (int)0); // use cast to resolve overloading ambiguity + br(Assembler::equal, true, Assembler::pn, Ldone); + // result is difference in lengths + if (ae == StrIntrinsicNode::UU) { + delayed()->sra(diff, 1, result); // Divide by 2 to get number of chars + } else { + delayed()->mov(diff, result); + } + + // Load first characters + if (ae == StrIntrinsicNode::LL) { + stride1 = stride2 = sizeof(jbyte); + ldub(str1, 0, chr1); + ldub(str2, 0, chr2); + } else if (ae == StrIntrinsicNode::UU) { + stride1 = stride2 = sizeof(jchar); + lduh(str1, 0, chr1); + lduh(str2, 0, chr2); + } else { + stride1 = sizeof(jbyte); + stride2 = sizeof(jchar); + ldub(str1, 0, chr1); + lduh(str2, 0, chr2); + } + + // Compare first characters + subcc(chr1, chr2, chr1); + br(Assembler::notZero, false, Assembler::pt, Ldone); + assert(chr1 == result, "result must be pre-placed"); + delayed()->nop(); + + // Check if the strings start at same location + cmp(str1, str2); + brx(Assembler::equal, true, Assembler::pn, Ldone); + delayed()->mov(G0, result); // result is zero + + // We have no guarantee that on 64 bit the higher half of limit is 0 + signx(limit1); + + // Get limit + if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) { + sll(limit1, 1, limit2); + subcc(limit2, stride2, chr2); + } + subcc(limit1, stride1, chr1); + br(Assembler::zero, true, Assembler::pn, Ldone); + // result is difference in lengths + if (ae == StrIntrinsicNode::UU) { + delayed()->sra(diff, 1, result); // Divide by 2 to get number of chars + } else { + delayed()->mov(diff, result); + } + + // Shift str1 and str2 to the end of the arrays, negate limit + add(str1, limit1, str1); + add(str2, limit2, str2); + neg(chr1, limit1); // limit1 = -(limit1-stride1) + if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) { + neg(chr2, limit2); // limit2 = -(limit2-stride2) + } + + // Compare the rest of the characters + load_sized_value(Address(str1, limit1), chr1, (ae == StrIntrinsicNode::UU) ? 2 : 1, false); + + bind(Lloop); + load_sized_value(Address(str2, limit2), chr2, (ae == StrIntrinsicNode::LL) ? 1 : 2, false); + + subcc(chr1, chr2, chr1); + br(Assembler::notZero, false, Assembler::pt, Ldone); + assert(chr1 == result, "result must be pre-placed"); + delayed()->inccc(limit1, stride1); + if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) { + inccc(limit2, stride2); + } + + // annul LDUB if branch is not taken to prevent access past end of string + br(Assembler::notZero, true, Assembler::pt, Lloop); + delayed()->load_sized_value(Address(str1, limit1), chr1, (ae == StrIntrinsicNode::UU) ? 2 : 1, false); + + // If strings are equal up to min length, return the length difference. + if (ae == StrIntrinsicNode::UU) { + // Divide by 2 to get number of chars + sra(diff, 1, result); + } else { + mov(diff, result); + } + + // Otherwise, return the difference between the first mismatched chars. + bind(Ldone); + if(ae == StrIntrinsicNode::UL) { + // Negate result (see note above) + neg(result); + } +} + +void C2_MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2, + Register limit, Register tmp, Register result, bool is_byte) { + Label Ldone, Lloop, Lremaining; + assert_different_registers(ary1, ary2, limit, tmp, result); + + int length_offset = arrayOopDesc::length_offset_in_bytes(); + int base_offset = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR); + assert(base_offset % 8 == 0, "Base offset must be 8-byte aligned"); + + if (is_array_equ) { + // return true if the same array + cmp(ary1, ary2); + brx(Assembler::equal, true, Assembler::pn, Ldone); + delayed()->mov(1, result); // equal + + br_null(ary1, true, Assembler::pn, Ldone); + delayed()->clr(result); // not equal + + br_null(ary2, true, Assembler::pn, Ldone); + delayed()->clr(result); // not equal + + // load the lengths of arrays + ld(Address(ary1, length_offset), limit); + ld(Address(ary2, length_offset), tmp); + + // return false if the two arrays are not equal length + cmp(limit, tmp); + br(Assembler::notEqual, true, Assembler::pn, Ldone); + delayed()->clr(result); // not equal + } + + cmp_zero_and_br(Assembler::zero, limit, Ldone, true, Assembler::pn); + delayed()->mov(1, result); // zero-length arrays are equal + + if (is_array_equ) { + // load array addresses + add(ary1, base_offset, ary1); + add(ary2, base_offset, ary2); + // set byte count + if (!is_byte) { + sll(limit, exact_log2(sizeof(jchar)), limit); + } + } else { + // We have no guarantee that on 64 bit the higher half of limit is 0 + signx(limit); + } + +#ifdef ASSERT + // Sanity check for doubleword (8-byte) alignment of ary1 and ary2. + // Guaranteed on 64-bit systems (see arrayOopDesc::header_size_in_bytes()). + Label Laligned; + or3(ary1, ary2, tmp); + andcc(tmp, 7, tmp); + br_null_short(tmp, Assembler::pn, Laligned); + STOP("First array element is not 8-byte aligned."); + should_not_reach_here(); + bind(Laligned); +#endif + + // Shift ary1 and ary2 to the end of the arrays, negate limit + add(ary1, limit, ary1); + add(ary2, limit, ary2); + neg(limit, limit); + + // MAIN LOOP + // Load and compare array elements of size 'byte_width' until the elements are not + // equal or we reached the end of the arrays. If the size of the arrays is not a + // multiple of 'byte_width', we simply read over the end of the array, bail out and + // compare the remaining bytes below by skipping the garbage bytes. + ldx(ary1, limit, result); + bind(Lloop); + ldx(ary2, limit, tmp); + inccc(limit, 8); + // Bail out if we reached the end (but still do the comparison) + br(Assembler::positive, false, Assembler::pn, Lremaining); + delayed()->cmp(result, tmp); + // Check equality of elements + brx(Assembler::equal, false, Assembler::pt, target(Lloop)); + delayed()->ldx(ary1, limit, result); + + ba(Ldone); + delayed()->clr(result); // not equal + + // TAIL COMPARISON + // We got here because we reached the end of the arrays. 'limit' is the number of + // garbage bytes we may have compared by reading over the end of the arrays. Shift + // out the garbage and compare the remaining elements. + bind(Lremaining); + // Optimistic shortcut: elements potentially including garbage are equal + brx(Assembler::equal, true, Assembler::pt, target(Ldone)); + delayed()->mov(1, result); // equal + // Shift 'limit' bytes to the right and compare + sll(limit, 3, limit); // bytes to bits + srlx(result, limit, result); + srlx(tmp, limit, tmp); + cmp(result, tmp); + clr(result); + movcc(Assembler::equal, false, xcc, 1, result); + + bind(Ldone); +} + +void C2_MacroAssembler::has_negatives(Register inp, Register size, Register result, Register t2, Register t3, Register t4, Register t5) { + + // test for negative bytes in input string of a given size + // result 1 if found, 0 otherwise. + + Label Lcore, Ltail, Lreturn, Lcore_rpt; + + assert_different_registers(inp, size, t2, t3, t4, t5, result); + + Register i = result; // result used as integer index i until very end + Register lmask = t2; // t2 is aliased to lmask + + // INITIALIZATION + // =========================================================== + // initialize highbits mask -> lmask = 0x8080808080808080 (8B/64b) + // compute unaligned offset -> i + // compute core end index -> t5 + Assembler::sethi(0x80808000, t2); //! sethi macro fails to emit optimal + add(t2, 0x80, t2); + sllx(t2, 32, t3); + or3(t3, t2, lmask); // 0x8080808080808080 -> lmask + sra(size,0,size); + andcc(inp, 0x7, i); // unaligned offset -> i + br(Assembler::zero, true, Assembler::pn, Lcore); // starts 8B aligned? + delayed()->add(size, -8, t5); // (annulled) core end index -> t5 + + // =========================================================== + + // UNALIGNED HEAD + // =========================================================== + // * unaligned head handling: grab aligned 8B containing unaligned inp(ut) + // * obliterate (ignore) bytes outside string by shifting off reg ends + // * compare with bitmask, short circuit return true if one or more high + // bits set. + cmp(size, 0); + br(Assembler::zero, true, Assembler::pn, Lreturn); // short-circuit? + delayed()->mov(0,result); // annulled so i not clobbered for following + neg(i, t4); + add(i, size, t5); + ldx(inp, t4, t3); // raw aligned 8B containing unaligned head -> t3 + mov(8, t4); + sub(t4, t5, t4); + sra(t4, 31, t5); + andn(t4, t5, t5); + add(i, t5, t4); + sll(t5, 3, t5); + sll(t4, 3, t4); // # bits to shift right, left -> t5,t4 + srlx(t3, t5, t3); + sllx(t3, t4, t3); // bytes outside string in 8B header obliterated -> t3 + andcc(lmask, t3, G0); + brx(Assembler::notZero, true, Assembler::pn, Lreturn); // short circuit? + delayed()->mov(1,result); // annulled so i not clobbered for following + add(size, -8, t5); // core end index -> t5 + mov(8, t4); + sub(t4, i, i); // # bytes examined in unalgn head (<8) -> i + // =========================================================== + + // ALIGNED CORE + // =========================================================== + // * iterate index i over aligned 8B sections of core, comparing with + // bitmask, short circuit return true if one or more high bits set + // t5 contains core end index/loop limit which is the index + // of the MSB of last (unaligned) 8B fully contained in the string. + // inp contains address of first byte in string/array + // lmask contains 8B high bit mask for comparison + // i contains next index to be processed (adr. inp+i is on 8B boundary) + bind(Lcore); + cmp_and_br_short(i, t5, Assembler::greater, Assembler::pn, Ltail); + bind(Lcore_rpt); + ldx(inp, i, t3); + andcc(t3, lmask, G0); + brx(Assembler::notZero, true, Assembler::pn, Lreturn); + delayed()->mov(1, result); // annulled so i not clobbered for following + add(i, 8, i); + cmp_and_br_short(i, t5, Assembler::lessEqual, Assembler::pn, Lcore_rpt); + // =========================================================== + + // ALIGNED TAIL (<8B) + // =========================================================== + // handle aligned tail of 7B or less as complete 8B, obliterating end of + // string bytes by shifting them off end, compare what's left with bitmask + // inp contains address of first byte in string/array + // lmask contains 8B high bit mask for comparison + // i contains next index to be processed (adr. inp+i is on 8B boundary) + bind(Ltail); + subcc(size, i, t4); // # of remaining bytes in string -> t4 + // return 0 if no more remaining bytes + br(Assembler::lessEqual, true, Assembler::pn, Lreturn); + delayed()->mov(0, result); // annulled so i not clobbered for following + ldx(inp, i, t3); // load final 8B (aligned) containing tail -> t3 + mov(8, t5); + sub(t5, t4, t4); + mov(0, result); // ** i clobbered at this point + sll(t4, 3, t4); // bits beyond end of string -> t4 + srlx(t3, t4, t3); // bytes beyond end now obliterated -> t3 + andcc(lmask, t3, G0); + movcc(Assembler::notZero, false, xcc, 1, result); + bind(Lreturn); +} + diff -ur --new-file a/src/hotspot/cpu/sparc/c2_MacroAssembler_sparc.hpp b/src/hotspot/cpu/sparc/c2_MacroAssembler_sparc.hpp --- a/src/hotspot/cpu/sparc/c2_MacroAssembler_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/c2_MacroAssembler_sparc.hpp 2023-04-16 11:42:11.060337196 +0000 @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_C2_MACROASSEMBLER_SPARC_HPP +#define CPU_SPARC_C2_MACROASSEMBLER_SPARC_HPP + +// C2_MacroAssembler contains high-level macros for C2 + + public: + // Compress char[] to byte[] by compressing 16 bytes at once. Return 0 on failure. + void string_compress_16(Register src, Register dst, Register cnt, Register result, + Register tmp1, Register tmp2, Register tmp3, Register tmp4, + FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, Label& Ldone); + + // Compress char[] to byte[]. Return 0 on failure. + void string_compress(Register src, Register dst, Register cnt, Register tmp, Register result, Label& Ldone); + + // Inflate byte[] to char[] by inflating 16 bytes at once. + void string_inflate_16(Register src, Register dst, Register cnt, Register tmp, + FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, FloatRegister ftmp4, Label& Ldone); + + // Inflate byte[] to char[]. + void string_inflate(Register src, Register dst, Register cnt, Register tmp, Label& Ldone); + + void string_compare(Register str1, Register str2, + Register cnt1, Register cnt2, + Register tmp1, Register tmp2, + Register result, int ae); + + void array_equals(bool is_array_equ, Register ary1, Register ary2, + Register limit, Register tmp, Register result, bool is_byte); + // test for negative bytes in input string of a given size, result 0 if none + void has_negatives(Register inp, Register size, Register result, + Register t2, Register t3, Register t4, + Register t5); + +#endif // CPU_SPARC_C2_MACROASSEMBLER_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/c2_globals_sparc.hpp b/src/hotspot/cpu/sparc/c2_globals_sparc.hpp --- a/src/hotspot/cpu/sparc/c2_globals_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/c2_globals_sparc.hpp 2023-04-16 11:42:11.060453552 +0000 @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_C2_GLOBALS_SPARC_HPP +#define CPU_SPARC_C2_GLOBALS_SPARC_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the server compiler. +// (see c2_globals.hpp). Alpha-sorted. + +define_pd_global(bool, BackgroundCompilation, true); +define_pd_global(bool, CICompileOSR, true); +define_pd_global(bool, InlineIntrinsics, true); +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, true); +define_pd_global(bool, UseOnStackReplacement, true); +define_pd_global(bool, ProfileInterpreter, true); +define_pd_global(bool, TieredCompilation, COMPILER1_PRESENT(true) NOT_COMPILER1(false)); +define_pd_global(intx, CompileThreshold, 10000); + +define_pd_global(intx, OnStackReplacePercentage, 140); +define_pd_global(intx, ConditionalMoveLimit, 4); +define_pd_global(intx, FLOATPRESSURE, 52); // C2 on V9 gets to use all the float/double registers +define_pd_global(intx, FreqInlineSize, 175); +define_pd_global(intx, INTPRESSURE, 48); // large register set +define_pd_global(intx, InteriorEntryAlignment, 16); // = CodeEntryAlignment +define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K)); +define_pd_global(intx, RegisterCostAreaRatio, 12000); +define_pd_global(bool, UseTLAB, true); +define_pd_global(bool, ResizeTLAB, true); +define_pd_global(intx, LoopUnrollLimit, 60); // Design center runs on 1.3.1 +define_pd_global(intx, LoopPercentProfileLimit, 10); +define_pd_global(intx, MinJumpTableSize, 5); + +// Peephole and CISC spilling both break the graph, and so makes the +// scheduler sick. +define_pd_global(bool, OptoPeephole, false); +define_pd_global(bool, UseCISCSpill, false); +define_pd_global(bool, OptoBundling, false); +define_pd_global(bool, OptoScheduling, true); +define_pd_global(bool, OptoRegScheduling, false); +define_pd_global(bool, SuperWordLoopUnrollAnalysis, false); +define_pd_global(bool, IdealizeClearArrayNode, true); + +// We need to make sure that all generated code is within +// 2 gigs of the libjvm.so runtime routines so we can use +// the faster "call" instruction rather than the expensive +// sequence of instructions to load a 64 bit pointer. +// +// InitialCodeCacheSize derived from specjbb2000 run. +define_pd_global(uintx, InitialCodeCacheSize, 2048*K); // Integral multiple of CodeCacheExpansionSize +define_pd_global(uintx, ReservedCodeCacheSize, 48*M); +define_pd_global(uintx, NonProfiledCodeHeapSize, 21*M); +define_pd_global(uintx, ProfiledCodeHeapSize, 22*M); +define_pd_global(uintx, NonNMethodCodeHeapSize, 5*M ); +define_pd_global(uintx, CodeCacheExpansionSize, 64*K); + +// Ergonomics related flags +define_pd_global(uint64_t,MaxRAM, 128ULL*G); +define_pd_global(uintx, CodeCacheMinBlockLength, 6); +define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); + +define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed on sparc. + +// Ergonomics related flags +define_pd_global(bool, NeverActAsServerClassMachine, false); + +#endif // CPU_SPARC_C2_GLOBALS_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/c2_init_sparc.cpp b/src/hotspot/cpu/sparc/c2_init_sparc.cpp --- a/src/hotspot/cpu/sparc/c2_init_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/c2_init_sparc.cpp 2023-04-16 11:42:11.060547394 +0000 @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "opto/compile.hpp" +#include "opto/node.hpp" + +// processor dependent initialization for sparc + +void Compile::pd_compiler2_init() { + guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); +} diff -ur --new-file a/src/hotspot/cpu/sparc/codeBuffer_sparc.hpp b/src/hotspot/cpu/sparc/codeBuffer_sparc.hpp --- a/src/hotspot/cpu/sparc/codeBuffer_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/codeBuffer_sparc.hpp 2023-04-16 11:42:11.060640036 +0000 @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_CODEBUFFER_SPARC_HPP +#define CPU_SPARC_CODEBUFFER_SPARC_HPP + +private: + void pd_initialize() {} + +public: + void flush_bundle(bool start_new_bundle) {} + +#endif // CPU_SPARC_CODEBUFFER_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/compiledIC_sparc.cpp b/src/hotspot/cpu/sparc/compiledIC_sparc.cpp --- a/src/hotspot/cpu/sparc/compiledIC_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/compiledIC_sparc.cpp 2023-04-16 11:42:11.060762677 +0000 @@ -0,0 +1,149 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/compiledIC.hpp" +#include "code/icBuffer.hpp" +#include "code/nmethod.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/safepoint.hpp" +#ifdef COMPILER2 +#include "opto/matcher.hpp" +#endif + +// ---------------------------------------------------------------------------- + +#define __ _masm. +address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) { + // Stub is fixed up when the corresponding call is converted from calling + // compiled code to calling interpreted code. + // set (empty), G5 + // jmp -1 + + if (mark == NULL) { + mark = cbuf.insts_mark(); // Get mark within main instrs section. + } + + MacroAssembler _masm(&cbuf); + + address base = __ start_a_stub(to_interp_stub_size()); + if (base == NULL) { + return NULL; // CodeBuffer::expand failed. + } + + // Static stub relocation stores the instruction address of the call. + __ relocate(static_stub_Relocation::spec(mark)); + + __ set_metadata(NULL, as_Register(Matcher::inline_cache_reg_encode())); + + __ set_inst_mark(); + AddressLiteral addrlit(-1); + __ JUMP(addrlit, G3, 0); + + __ delayed()->nop(); + + assert(__ pc() - base <= to_interp_stub_size(), "wrong stub size"); + + // Update current stubs pointer and restore code_end. + __ end_a_stub(); + return base; +} +#undef __ + +int CompiledStaticCall::to_trampoline_stub_size() { + // SPARC doesn't use trampolines. + return 0; +} + +int CompiledStaticCall::to_interp_stub_size() { + // This doesn't need to be accurate but it must be larger or equal to + // the real size of the stub. + return (NativeMovConstReg::instruction_size + // sethi/setlo; + NativeJump::instruction_size); // sethi; jmp; nop +} + +// Relocation entries for call stub, compiled java to interpreter. +int CompiledStaticCall::reloc_to_interp_stub() { + return 10; // 4 in emit_java_to_interp + 1 in Java_Static_Call +} + +void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) { + address stub = find_stub(); + guarantee(stub != NULL, "stub not found"); + + if (TraceICs) { + ResourceMark rm; + tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", + p2i(instruction_address()), + callee->name_and_sig_as_C_string()); + } + + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + verify_mt_safe(callee, entry, method_holder, jump); + + // Update stub. + method_holder->set_data((intptr_t)callee()); + jump->set_jump_destination(entry); + + // Update jump to call. + set_destination_mt_safe(stub); +} + +void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { + // Reset stub. + address stub = static_stub->addr(); + assert(stub != NULL, "stub not found"); + assert(CompiledICLocker::is_safe(stub), "mt unsafe call"); + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + method_holder->set_data(0); + jump->set_jump_destination((address)-1); +} + +//----------------------------------------------------------------------------- +// Non-product mode code +#ifndef PRODUCT + +void CompiledDirectStaticCall::verify() { + // Verify call. + _call->verify(); + _call->verify_alignment(); + + // Verify stub. + address stub = find_stub(); + assert(stub != NULL, "no stub found for static call"); + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + + // Verify state. + assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); +} + +#endif // !PRODUCT diff -ur --new-file a/src/hotspot/cpu/sparc/copy_sparc.hpp b/src/hotspot/cpu/sparc/copy_sparc.hpp --- a/src/hotspot/cpu/sparc/copy_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/copy_sparc.hpp 2023-04-16 11:42:11.060898300 +0000 @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_COPY_SPARC_HPP +#define CPU_SPARC_COPY_SPARC_HPP + +// Inline functions for memory copy and fill. + +static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + (void)memmove(to, from, count * HeapWordSize); +} + +static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + switch (count) { + case 8: to[7] = from[7]; + case 7: to[6] = from[6]; + case 6: to[5] = from[5]; + case 5: to[4] = from[4]; + case 4: to[3] = from[3]; + case 3: to[2] = from[2]; + case 2: to[1] = from[1]; + case 1: to[0] = from[0]; + case 0: break; + default: (void)memcpy(to, from, count * HeapWordSize); + break; + } +} + +static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { + switch (count) { + case 8: to[7] = from[7]; + case 7: to[6] = from[6]; + case 6: to[5] = from[5]; + case 5: to[4] = from[4]; + case 4: to[3] = from[3]; + case 3: to[2] = from[2]; + case 2: to[1] = from[1]; + case 1: to[0] = from[0]; + case 0: break; + default: while (count-- > 0) { + *to++ = *from++; + } + break; + } +} + +static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + (void)memmove(to, from, count * HeapWordSize); +} + +static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + pd_disjoint_words(from, to, count); +} + +static void pd_conjoint_bytes(const void* from, void* to, size_t count) { + (void)memmove(to, from, count); +} + +static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { + (void)memmove(to, from, count); +} + +static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { + if (from > to) { + while (count-- > 0) { + // Copy forwards + *to++ = *from++; + } + } else { + from += count - 1; + to += count - 1; + while (count-- > 0) { + // Copy backwards + *to-- = *from--; + } + } +} + +static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { + if (from > to) { + while (count-- > 0) { + // Copy forwards + *to++ = *from++; + } + } else { + from += count - 1; + to += count - 1; + while (count-- > 0) { + // Copy backwards + *to-- = *from--; + } + } +} + +static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { + assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); + pd_conjoint_oops_atomic((const oop*)from, (oop*)to, count); +} + +static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { + // Do better than this: inline memmove body NEEDS CLEANUP + if (from > to) { + while (count-- > 0) { + // Copy forwards + *to++ = *from++; + } + } else { + from += count - 1; + to += count - 1; + while (count-- > 0) { + // Copy backwards + *to-- = *from--; + } + } +} + +static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_bytes_atomic(from, to, count); +} + +static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jshorts_atomic((const jshort*)from, (jshort*)to, count); +} + +static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jints_atomic((const jint*)from, (jint*)to, count); +} + +static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count); +} + +static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_oops_atomic((const oop*)from, (oop*)to, count); +} + +static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { + guarantee(mask_bits((uintptr_t)tohw, right_n_bits(LogBytesPerLong)) == 0, + "unaligned fill words"); + julong* to = (julong*)tohw; + julong v = ((julong)value << 32) | value; + while (count-- > 0) { + *to++ = v; + } +} + +typedef void (*_zero_Fn)(HeapWord* to, size_t count); + +// Only used for heap objects, so align_object_offset. +// All other platforms pd_fill_to_aligned_words simply calls pd_fill_to_words, don't +// know why this one is different. +static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { + assert(MinObjAlignmentInBytes >= BytesPerLong, "need alternate implementation"); + + julong* to = (julong*)tohw; + julong v = ((julong)value << 32) | value; + // If count is odd, odd will be equal to 1 on 32-bit platform + // and be equal to 0 on 64-bit platform. + size_t odd = count % (BytesPerLong / HeapWordSize) ; + + size_t aligned_count = align_object_offset(count - odd) / HeapWordsPerLong; + julong* end = ((julong*)tohw) + aligned_count - 1; + while (to <= end) { + DEBUG_ONLY(count -= BytesPerLong / HeapWordSize ;) + *to++ = v; + } + assert(count == odd, "bad bounds on loop filling to aligned words"); + if (odd) { + *((juint*)to) = value; + + } +} + +static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { + (void)memset(to, value, count); +} + +static void pd_zero_to_words(HeapWord* tohw, size_t count) { + pd_fill_to_words(tohw, count, 0); +} + +static void pd_zero_to_bytes(void* to, size_t count) { + (void)memset(to, 0, count); +} + +#endif // CPU_SPARC_COPY_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/disassembler_sparc.hpp b/src/hotspot/cpu/sparc/disassembler_sparc.hpp --- a/src/hotspot/cpu/sparc/disassembler_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/disassembler_sparc.hpp 2023-04-16 11:42:11.061207519 +0000 @@ -0,0 +1,56 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_DISASSEMBLER_SPARC_HPP +#define CPU_SPARC_DISASSEMBLER_SPARC_HPP + + static int pd_instruction_alignment() { + return sizeof(int); + } + + static const char* pd_cpu_opts() { + return "v9only"; + } + + // Returns address of n-th instruction preceding addr, + // NULL if no preceding instruction can be found. + // With SPARC being a RISC architecture, this always is BytesPerInstWord + // It might be beneficial to check "is_readable" as we do on ppc and s390. + static address find_prev_instr(address addr, int n_instr) { + return addr - BytesPerInstWord*n_instr; + } + + // special-case instruction decoding. + // There may be cases where the binutils disassembler doesn't do + // the perfect job. In those cases, decode_instruction0 may kick in + // and do it right. + // If nothing had to be done, just return "here", otherwise return "here + instr_len(here)" + static address decode_instruction0(address here, outputStream* st, address virtual_begin = NULL) { + return here; + } + + // platform-specific instruction annotations (like value of loaded constants) + static void annotate(address pc, outputStream* st) { }; + +#endif // CPU_SPARC_DISASSEMBLER_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/frame_sparc.cpp b/src/hotspot/cpu/sparc/frame_sparc.cpp --- a/src/hotspot/cpu/sparc/frame_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/frame_sparc.cpp 2023-04-16 11:42:11.061547266 +0000 @@ -0,0 +1,830 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "code/codeCache.hpp" +#include "compiler/oopMap.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "memory/universe.hpp" +#include "oops/markWord.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/monitorChunk.hpp" +#include "runtime/signature.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" +#include "vmreg_sparc.inline.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#include "runtime/vframeArray.hpp" +#endif + +void RegisterMap::pd_clear() { + if (_thread->has_last_Java_frame()) { + frame fr = _thread->last_frame(); + _window = fr.sp(); + } else { + _window = NULL; + } + _younger_window = NULL; +} + + +// Unified register numbering scheme: each 32-bits counts as a register +// number, so all the V9 registers take 2 slots. +const static int R_L_nums[] = {0+040,2+040,4+040,6+040,8+040,10+040,12+040,14+040}; +const static int R_I_nums[] = {0+060,2+060,4+060,6+060,8+060,10+060,12+060,14+060}; +const static int R_O_nums[] = {0+020,2+020,4+020,6+020,8+020,10+020,12+020,14+020}; +const static int R_G_nums[] = {0+000,2+000,4+000,6+000,8+000,10+000,12+000,14+000}; +static RegisterMap::LocationValidType bad_mask = 0; +static RegisterMap::LocationValidType R_LIO_mask = 0; +static bool register_map_inited = false; + +static void register_map_init() { + if (!register_map_inited) { + register_map_inited = true; + int i; + for (i = 0; i < 8; i++) { + assert(R_L_nums[i] < RegisterMap::location_valid_type_size, "in first chunk"); + assert(R_I_nums[i] < RegisterMap::location_valid_type_size, "in first chunk"); + assert(R_O_nums[i] < RegisterMap::location_valid_type_size, "in first chunk"); + assert(R_G_nums[i] < RegisterMap::location_valid_type_size, "in first chunk"); + } + + bad_mask |= (1LL << R_O_nums[6]); // SP + bad_mask |= (1LL << R_O_nums[7]); // cPC + bad_mask |= (1LL << R_I_nums[6]); // FP + bad_mask |= (1LL << R_I_nums[7]); // rPC + bad_mask |= (1LL << R_G_nums[2]); // TLS + bad_mask |= (1LL << R_G_nums[7]); // reserved by libthread + + for (i = 0; i < 8; i++) { + R_LIO_mask |= (1LL << R_L_nums[i]); + R_LIO_mask |= (1LL << R_I_nums[i]); + R_LIO_mask |= (1LL << R_O_nums[i]); + } + } +} + + +address RegisterMap::pd_location(VMReg regname) const { + register_map_init(); + + assert(regname->is_reg(), "sanity check"); + // Only the GPRs get handled this way + if( !regname->is_Register()) + return NULL; + + // don't talk about bad registers + if ((bad_mask & ((LocationValidType)1 << regname->value())) != 0) { + return NULL; + } + + // Convert to a GPR + Register reg; + int second_word = 0; + // 32-bit registers for in, out and local + if (!regname->is_concrete()) { + // HMM ought to return NULL for any non-concrete (odd) vmreg + // this all tied up in the fact we put out double oopMaps for + // register locations. When that is fixed we'd will return NULL + // (or assert here). + reg = regname->prev()->as_Register(); + second_word = sizeof(jint); + } else { + reg = regname->as_Register(); + } + if (reg->is_out()) { + return _younger_window == NULL ? NULL : + second_word + (address)&_younger_window[reg->after_save()->sp_offset_in_saved_window()]; + } + if (reg->is_local() || reg->is_in()) { + assert(_window != NULL, "Window should be available"); + return second_word + (address)&_window[reg->sp_offset_in_saved_window()]; + } + // Only the window'd GPRs get handled this way; not the globals. + return NULL; +} + + +#ifdef ASSERT +void RegisterMap::check_location_valid() { + register_map_init(); + assert((_location_valid[0] & bad_mask) == 0, "cannot have special locations for SP,FP,TLS,etc."); +} +#endif + +// We are shifting windows. That means we are moving all %i to %o, +// getting rid of all current %l, and keeping all %g. This is only +// complicated if any of the location pointers for these are valid. +// The normal case is that everything is in its standard register window +// home, and _location_valid[0] is zero. In that case, this routine +// does exactly nothing. +void RegisterMap::shift_individual_registers() { + if (!update_map()) return; // this only applies to maps with locations + register_map_init(); + check_location_valid(); + + LocationValidType lv = _location_valid[0]; + LocationValidType lv0 = lv; + + lv &= ~R_LIO_mask; // clear %l, %o, %i regs + + // if we cleared some non-%g locations, we may have to do some shifting + if (lv != lv0) { + // copy %i0-%i5 to %o0-%o5, if they have special locations + // This can happen in within stubs which spill argument registers + // around a dynamic link operation, such as resolve_opt_virtual_call. + for (int i = 0; i < 8; i++) { + if (lv0 & (1LL << R_I_nums[i])) { + _location[R_O_nums[i]] = _location[R_I_nums[i]]; + lv |= (1LL << R_O_nums[i]); + } + } + } + + _location_valid[0] = lv; + check_location_valid(); +} + +bool frame::safe_for_sender(JavaThread *thread) { + + address _SP = (address) sp(); + address _FP = (address) fp(); + address _UNEXTENDED_SP = (address) unextended_sp(); + + // consider stack guards when trying to determine "safe" stack pointers + // sp must be within the usable part of the stack (not in guards) + if (!thread->is_in_usable_stack(_SP)) { + return false; + } + + // unextended sp must be within the stack and above or equal sp + if (!thread->is_in_stack_range_incl(_UNEXTENDED_SP, _SP)) { + return false; + } + + // an fp must be within the stack and above (but not equal) sp + bool fp_safe = thread->is_in_stack_range_excl(_FP, _SP); + + // We know sp/unextended_sp are safe only fp is questionable here + + // If the current frame is known to the code cache then we can attempt to + // construct the sender and do some validation of it. This goes a long way + // toward eliminating issues when we get in frame construction code + + if (_cb != NULL ) { + + // First check if frame is complete and tester is reliable + // Unfortunately we can only check frame complete for runtime stubs and nmethod + // other generic buffer blobs are more problematic so we just assume they are + // ok. adapter blobs never have a frame complete and are never ok. + + if (!_cb->is_frame_complete_at(_pc)) { + if (_cb->is_compiled() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) { + return false; + } + } + + // Could just be some random pointer within the codeBlob + if (!_cb->code_contains(_pc)) { + return false; + } + + // Entry frame checks + if (is_entry_frame()) { + // an entry frame must have a valid fp. + return fp_safe && is_entry_frame_valid(thread); + } + + intptr_t* younger_sp = sp(); + intptr_t* _SENDER_SP = sender_sp(); // sender is actually just _FP + bool adjusted_stack = is_interpreted_frame(); + + address sender_pc = (address)younger_sp[I7->sp_offset_in_saved_window()] + pc_return_offset; + + + // We must always be able to find a recognizable pc + CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); + if (sender_pc == NULL || sender_blob == NULL) { + return false; + } + + // Could be a zombie method + if (sender_blob->is_zombie() || sender_blob->is_unloaded()) { + return false; + } + + // It should be safe to construct the sender though it might not be valid + + frame sender(_SENDER_SP, younger_sp, adjusted_stack); + + // Do we have a valid fp? + address sender_fp = (address) sender.fp(); + + // an fp must be within the stack and above (but not equal) current frame's _FP + + if (!thread->is_in_stack_range_excl(sender_fp, _FP)) { + return false; + } + + + // If the potential sender is the interpreter then we can do some more checking + if (Interpreter::contains(sender_pc)) { + return sender.is_interpreted_frame_valid(thread); + } + + // Could just be some random pointer within the codeBlob + if (!sender.cb()->code_contains(sender_pc)) { + return false; + } + + // We should never be able to see an adapter if the current frame is something from code cache + if (sender_blob->is_adapter_blob()) { + return false; + } + + if (sender.is_entry_frame()) { + // Validate the JavaCallWrapper an entry frame must have + address jcw = (address)sender.entry_frame_call_wrapper(); + + return thread->is_in_stack_range_excl(jcw, sender_fp); + } + + // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size + // because you must allocate window space + + if (sender_blob->frame_size() <= 0) { + assert(!sender_blob->is_compiled(), "should count return address at least"); + return false; + } + + // The sender should positively be an nmethod or call_stub. On sparc we might in fact see something else. + // The cause of this is because at a save instruction the O7 we get is a leftover from an earlier + // window use. So if a runtime stub creates two frames (common in fastdebug/debug) then we see the + // stale pc. So if the sender blob is not something we'd expect we have little choice but to declare + // the stack unwalkable. pd_get_top_frame_for_signal_handler tries to recover from this by unwinding + // that initial frame and retrying. + + if (!sender_blob->is_compiled()) { + return false; + } + + // Could put some more validation for the potential non-interpreted sender + // frame we'd create by calling sender if I could think of any. Wait for next crash in forte... + + // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb + + // We've validated the potential sender that would be created + + return true; + + } + + // Must be native-compiled frame. Since sender will try and use fp to find + // linkages it must be safe + + if (!fp_safe) return false; + + // could try and do some more potential verification of native frame if we could think of some... + + return true; +} + +// constructors + +// Construct an unpatchable, deficient frame +void frame::init(intptr_t* sp, address pc, CodeBlob* cb) { + assert( (((intptr_t)sp & (wordSize-1)) == 0), "frame constructor passed an invalid sp"); + _sp = sp; + _younger_sp = NULL; + _pc = pc; + _cb = cb; + _sp_adjustment_by_callee = 0; + assert(pc == NULL && cb == NULL || pc != NULL, "can't have a cb and no pc!"); + if (_cb == NULL && _pc != NULL ) { + _cb = CodeCache::find_blob(_pc); + } + _deopt_state = unknown; +} + +frame::frame(intptr_t* sp, unpatchable_t, address pc, CodeBlob* cb) { + init(sp, pc, cb); +} + +frame::frame(intptr_t* sp, intptr_t* younger_sp, bool younger_frame_is_interpreted) : + _sp(sp), + _deopt_state(unknown), + _younger_sp(younger_sp), + _sp_adjustment_by_callee(0) { + if (younger_sp == NULL) { + // make a deficient frame which doesn't know where its PC is + _pc = NULL; + _cb = NULL; + } else { + _pc = (address)younger_sp[I7->sp_offset_in_saved_window()] + pc_return_offset; + assert( (intptr_t*)younger_sp[FP->sp_offset_in_saved_window()] == (intptr_t*)((intptr_t)sp - STACK_BIAS), "younger_sp must be valid"); + // Any frame we ever build should always "safe" therefore we should not have to call + // find_blob_unsafe + // In case of native stubs, the pc retrieved here might be + // wrong. (the _last_native_pc will have the right value) + // So do not put add any asserts on the _pc here. + } + + if (_pc != NULL) + _cb = CodeCache::find_blob(_pc); + + // Check for MethodHandle call sites. + if (_cb != NULL) { + CompiledMethod* nm = _cb->as_compiled_method_or_null(); + if (nm != NULL) { + if (nm->is_deopt_mh_entry(_pc) || nm->is_method_handle_return(_pc)) { + _sp_adjustment_by_callee = (intptr_t*) ((intptr_t) sp[L7_mh_SP_save->sp_offset_in_saved_window()] + STACK_BIAS) - sp; + // The SP is already adjusted by this MH call site, don't + // overwrite this value with the wrong interpreter value. + younger_frame_is_interpreted = false; + } + } + } + + if (younger_frame_is_interpreted) { + // compute adjustment to this frame's SP made by its interpreted callee + _sp_adjustment_by_callee = (intptr_t*) ((intptr_t) younger_sp[I5_savedSP->sp_offset_in_saved_window()] + STACK_BIAS) - sp; + } + + // It is important that the frame is fully constructed when we do + // this lookup as get_deopt_original_pc() needs a correct value for + // unextended_sp() which uses _sp_adjustment_by_callee. + if (_pc != NULL) { + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } + } +} + +#ifndef PRODUCT +// This is a generic constructor which is only used by pns() in debug.cpp. +frame::frame(void* sp, void* fp, void* pc) { + init((intptr_t*)sp, (address)pc, NULL); +} + +extern "C" void findpc(intptr_t x); + +void frame::pd_ps() { + intptr_t* curr_sp = sp(); + intptr_t* prev_sp = curr_sp - 1; + intptr_t *pc = NULL; + intptr_t *next_pc = NULL; + int count = 0; + tty->print_cr("register window backtrace from " INTPTR_FORMAT ":", p2i(curr_sp)); + while (curr_sp != NULL && ((intptr_t)curr_sp & 7) == 0 && curr_sp > prev_sp && curr_sp < prev_sp+1000) { + pc = next_pc; + next_pc = (intptr_t*) curr_sp[I7->sp_offset_in_saved_window()]; + tty->print("[%d] curr_sp=" INTPTR_FORMAT " pc=", count, p2i(curr_sp)); + findpc((intptr_t)pc); + if (WizardMode && Verbose) { + // print register window contents also + tty->print_cr(" L0..L7: {" + INTPTR_FORMAT " " INTPTR_FORMAT " " INTPTR_FORMAT " " INTPTR_FORMAT " " + INTPTR_FORMAT " " INTPTR_FORMAT " " INTPTR_FORMAT " " INTPTR_FORMAT " ", + curr_sp[0+0], curr_sp[0+1], curr_sp[0+2], curr_sp[0+3], + curr_sp[0+4], curr_sp[0+5], curr_sp[0+6], curr_sp[0+7]); + tty->print_cr(" I0..I7: {" + INTPTR_FORMAT " " INTPTR_FORMAT " " INTPTR_FORMAT " " INTPTR_FORMAT " " + INTPTR_FORMAT " " INTPTR_FORMAT " " INTPTR_FORMAT " " INTPTR_FORMAT " ", + curr_sp[8+0], curr_sp[8+1], curr_sp[8+2], curr_sp[8+3], + curr_sp[8+4], curr_sp[8+5], curr_sp[8+6], curr_sp[8+7]); + // (and print stack frame contents too??) + + CodeBlob *b = CodeCache::find_blob((address) pc); + if (b != NULL) { + if (b->is_nmethod()) { + Method* m = ((nmethod*)b)->method(); + int nlocals = m->max_locals(); + int nparams = m->size_of_parameters(); + tty->print_cr("compiled java method (locals = %d, params = %d)", nlocals, nparams); + } + } + } + prev_sp = curr_sp; + curr_sp = (intptr_t *)curr_sp[FP->sp_offset_in_saved_window()]; + curr_sp = (intptr_t *)((intptr_t)curr_sp + STACK_BIAS); + count += 1; + } + if (curr_sp != NULL) + tty->print("[%d] curr_sp=" INTPTR_FORMAT " [bogus sp!]", count, p2i(curr_sp)); +} + +#endif // PRODUCT + +bool frame::is_interpreted_frame() const { + return Interpreter::contains(pc()); +} + +// sender_sp + +intptr_t* frame::interpreter_frame_sender_sp() const { + assert(is_interpreted_frame(), "interpreted frame expected"); + return fp(); +} + +void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) { + assert(is_interpreted_frame(), "interpreted frame expected"); + Unimplemented(); +} + +frame frame::sender_for_entry_frame(RegisterMap *map) const { + assert(map != NULL, "map must be set"); + // Java frame called from C; skip all C frames and return top C + // frame of that chunk as the sender + JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); + assert(!entry_frame_is_first(), "next Java fp must be non zero"); + assert(jfa->last_Java_sp() > _sp, "must be above this frame on stack"); + intptr_t* last_Java_sp = jfa->last_Java_sp(); + // Since we are walking the stack now this nested anchor is obviously walkable + // even if it wasn't when it was stacked. + if (!jfa->walkable()) { + // Capture _last_Java_pc (if needed) and mark anchor walkable. + jfa->capture_last_Java_pc(_sp); + } + assert(jfa->last_Java_pc() != NULL, "No captured pc!"); + map->clear(); + map->make_integer_regs_unsaved(); + map->shift_window(last_Java_sp, NULL); + assert(map->include_argument_oops(), "should be set by clear"); + return frame(last_Java_sp, frame::unpatchable, jfa->last_Java_pc()); +} + +OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const { + ShouldNotCallThis(); + return nullptr; +} + +bool frame::optimized_entry_frame_is_first() const { + ShouldNotCallThis(); + return false; +} + +frame frame::sender_for_interpreter_frame(RegisterMap *map) const { + ShouldNotCallThis(); + return sender(map); +} + +frame frame::sender_for_compiled_frame(RegisterMap *map) const { + ShouldNotCallThis(); + return sender(map); +} + +frame frame::sender(RegisterMap* map) const { + assert(map != NULL, "map must be set"); + + assert(CodeCache::find_blob_unsafe(_pc) == _cb, "inconsistent"); + + // Default is not to follow arguments; update it accordingly below + map->set_include_argument_oops(false); + + if (is_entry_frame()) return sender_for_entry_frame(map); + + intptr_t* younger_sp = sp(); + intptr_t* sp = sender_sp(); + + // Note: The version of this operation on any platform with callee-save + // registers must update the register map (if not null). + // In order to do this correctly, the various subtypes of + // of frame (interpreted, compiled, glue, native), + // must be distinguished. There is no need on SPARC for + // such distinctions, because all callee-save registers are + // preserved for all frames via SPARC-specific mechanisms. + // + // *** HOWEVER, *** if and when we make any floating-point + // registers callee-saved, then we will have to copy over + // the RegisterMap update logic from the Intel code. + + // The constructor of the sender must know whether this frame is interpreted so it can set the + // sender's _sp_adjustment_by_callee field. An osr adapter frame was originally + // interpreted but its pc is in the code cache (for c1 -> osr_frame_return_id stub), so it must be + // explicitly recognized. + + + bool frame_is_interpreted = is_interpreted_frame(); + if (frame_is_interpreted) { + map->make_integer_regs_unsaved(); + map->shift_window(sp, younger_sp); + } else if (_cb != NULL) { + // Update the locations of implicitly saved registers to be their + // addresses in the register save area. + // For %o registers, the addresses of %i registers in the next younger + // frame are used. + map->shift_window(sp, younger_sp); + if (map->update_map()) { + // Tell GC to use argument oopmaps for some runtime stubs that need it. + // For C1, the runtime stub might not have oop maps, so set this flag + // outside of update_register_map. + map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); + if (_cb->oop_maps() != NULL) { + OopMapSet::update_register_map(this, map); + } + } + } + return frame(sp, younger_sp, frame_is_interpreted); +} + + +void frame::patch_pc(Thread* thread, address pc) { + assert(_cb == CodeCache::find_blob(pc), "unexpected pc"); + vmassert(_deopt_state != unknown, "frame is unpatchable"); + if (thread == Thread::current()) { + StubRoutines::Sparc::flush_callers_register_windows_func()(); + } + if (TracePcPatching) { + // QQQ this assert is invalid (or too strong anyway) sice _pc could + // be original pc and frame could have the deopt pc. + // assert(_pc == *O7_addr() + pc_return_offset, "frame has wrong pc"); + tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", + p2i(O7_addr()), p2i(_pc), p2i(pc)); + } + *O7_addr() = pc - pc_return_offset; + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + assert(original_pc == _pc, "expected original to be stored before patching"); + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + + +static bool sp_is_valid(intptr_t* old_sp, intptr_t* young_sp, intptr_t* sp) { + return (((intptr_t)sp & (2*wordSize-1)) == 0 && + sp <= old_sp && + sp >= young_sp); +} + + +/* + Find the (biased) sp that is just younger than old_sp starting at sp. + If not found return NULL. Register windows are assumed to be flushed. +*/ +intptr_t* frame::next_younger_sp_or_null(intptr_t* old_sp, intptr_t* sp) { + + intptr_t* previous_sp = NULL; + intptr_t* orig_sp = sp; + + int max_frames = (old_sp - sp) / 16; // Minimum frame size is 16 + int max_frame2 = max_frames; + while(sp != old_sp && sp_is_valid(old_sp, orig_sp, sp)) { + if (max_frames-- <= 0) + // too many frames have gone by; invalid parameters given to this function + break; + previous_sp = sp; + sp = (intptr_t*)sp[FP->sp_offset_in_saved_window()]; + sp = (intptr_t*)((intptr_t)sp + STACK_BIAS); + } + + return (sp == old_sp ? previous_sp : NULL); +} + +/* + Determine if "sp" is a valid stack pointer. "sp" is assumed to be younger than + "valid_sp". So if "sp" is valid itself then it should be possible to walk frames + from "sp" to "valid_sp". The assumption is that the registers windows for the + thread stack in question are flushed. +*/ +bool frame::is_valid_stack_pointer(intptr_t* valid_sp, intptr_t* sp) { + return next_younger_sp_or_null(valid_sp, sp) != NULL; +} + +bool frame::is_interpreted_frame_valid(JavaThread* thread) const { + assert(is_interpreted_frame(), "Not an interpreted frame"); + // These are reasonable sanity checks + if (fp() == 0 || (intptr_t(fp()) & (2*wordSize-1)) != 0) { + return false; + } + if (sp() == 0 || (intptr_t(sp()) & (2*wordSize-1)) != 0) { + return false; + } + + if (fp() + interpreter_frame_initial_sp_offset < sp()) { + return false; + } + // These are hacks to keep us out of trouble. + // The problem with these is that they mask other problems + if (fp() <= sp()) { // this attempts to deal with unsigned comparison above + return false; + } + // do some validation of frame elements + + // first the method + + Method* m = *interpreter_frame_method_addr(); + + // validate the method we'd find in this potential sender + if (!Method::is_valid_method(m)) return false; + + // stack frames shouldn't be much larger than max_stack elements + + if (fp() - unextended_sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) { + return false; + } + + // validate bci/bcp + + address bcp = interpreter_frame_bcp(); + if (m->validate_bci_from_bcp(bcp) < 0) { + return false; + } + + // validate ConstantPoolCache* + ConstantPoolCache* cp = *interpreter_frame_cache_addr(); + if (MetaspaceObj::is_valid(cp) == false) return false; + + // validate locals + + address locals = (address) *interpreter_frame_locals_addr(); + return thread->is_in_stack_range_incl(locals, (address)fp()); +} + + +// Windows have been flushed on entry (but not marked). Capture the pc that +// is the return address to the frame that contains "sp" as its stack pointer. +// This pc resides in the called of the frame corresponding to "sp". +// As a side effect we mark this JavaFrameAnchor as having flushed the windows. +// This side effect lets us mark stacked JavaFrameAnchors (stacked in the +// call_helper) as flushed when we have flushed the windows for the most +// recent (i.e. current) JavaFrameAnchor. This saves useless flushing calls +// and lets us find the pc just once rather than multiple times as it did +// in the bad old _post_Java_state days. +// +void JavaFrameAnchor::capture_last_Java_pc(intptr_t* sp) { + if (last_Java_sp() != NULL && last_Java_pc() == NULL) { + // try and find the sp just younger than _last_Java_sp + intptr_t* _post_Java_sp = frame::next_younger_sp_or_null(last_Java_sp(), sp); + // Really this should never fail otherwise VM call must have non-standard + // frame linkage (bad) or stack is not properly flushed (worse). + guarantee(_post_Java_sp != NULL, "bad stack!"); + _last_Java_pc = (address) _post_Java_sp[ I7->sp_offset_in_saved_window()] + frame::pc_return_offset; + + } + set_window_flushed(); +} + +void JavaFrameAnchor::make_walkable(JavaThread* thread) { + if (walkable()) return; + // We always flush in case the profiler wants it but we won't mark + // the windows as flushed unless we have a last_Java_frame + intptr_t* sp = StubRoutines::Sparc::flush_callers_register_windows_func()(); + if (last_Java_sp() != NULL ) { + capture_last_Java_pc(sp); + } +} + +intptr_t* frame::entry_frame_argument_at(int offset) const { + // convert offset to index to deal with tsi + int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); + + intptr_t* LSP = (intptr_t*) sp()[Lentry_args->sp_offset_in_saved_window()]; + return &LSP[index+1]; +} + + +BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { + assert(is_interpreted_frame(), "interpreted frame expected"); + Method* method = interpreter_frame_method(); + BasicType type = method->result_type(); + + if (method->is_native()) { + // Prior to notifying the runtime of the method_exit the possible result + // value is saved to l_scratch and d_scratch. + + intptr_t* l_scratch = fp() + interpreter_frame_l_scratch_fp_offset; + intptr_t* d_scratch = fp() + interpreter_frame_d_scratch_fp_offset; + + address l_addr = (address)l_scratch; + // On 64-bit the result for 1/8/16/32-bit result types is in the other + // word half + l_addr += wordSize/2; + + switch (type) { + case T_OBJECT: + case T_ARRAY: { + oop obj = cast_to_oop(at(interpreter_frame_oop_temp_offset)); + assert(Universe::is_in_heap_or_null(obj), "sanity check"); + *oop_result = obj; + break; + } + + case T_BOOLEAN : { jint* p = (jint*)l_addr; value_result->z = (jboolean)((*p) & 0x1); break; } + case T_BYTE : { jint* p = (jint*)l_addr; value_result->b = (jbyte)((*p) & 0xff); break; } + case T_CHAR : { jint* p = (jint*)l_addr; value_result->c = (jchar)((*p) & 0xffff); break; } + case T_SHORT : { jint* p = (jint*)l_addr; value_result->s = (jshort)((*p) & 0xffff); break; } + case T_INT : value_result->i = *(jint*)l_addr; break; + case T_LONG : value_result->j = *(jlong*)l_scratch; break; + case T_FLOAT : value_result->f = *(jfloat*)d_scratch; break; + case T_DOUBLE : value_result->d = *(jdouble*)d_scratch; break; + case T_VOID : /* Nothing to do */ break; + default : ShouldNotReachHere(); + } + } else { + intptr_t* tos_addr = interpreter_frame_tos_address(); + + switch(type) { + case T_OBJECT: + case T_ARRAY: { + oop obj = cast_to_oop(*tos_addr); + assert(Universe::is_in_heap_or_null(obj), "sanity check"); + *oop_result = obj; + break; + } + case T_BOOLEAN : { jint* p = (jint*)tos_addr; value_result->z = (jboolean)((*p) & 0x1); break; } + case T_BYTE : { jint* p = (jint*)tos_addr; value_result->b = (jbyte)((*p) & 0xff); break; } + case T_CHAR : { jint* p = (jint*)tos_addr; value_result->c = (jchar)((*p) & 0xffff); break; } + case T_SHORT : { jint* p = (jint*)tos_addr; value_result->s = (jshort)((*p) & 0xffff); break; } + case T_INT : value_result->i = *(jint*)tos_addr; break; + case T_LONG : value_result->j = *(jlong*)tos_addr; break; + case T_FLOAT : value_result->f = *(jfloat*)tos_addr; break; + case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; + case T_VOID : /* Nothing to do */ break; + default : ShouldNotReachHere(); + } + }; + + return type; +} + +// Lesp pointer is one word lower than the top item on the stack. +intptr_t* frame::interpreter_frame_tos_at(jint offset) const { + int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize) - 1; + return &interpreter_frame_tos_address()[index]; +} + + +#ifndef PRODUCT + +#define DESCRIBE_FP_OFFSET(name) \ + values.describe(frame_no, fp() + frame::name##_offset, #name) + +void frame::describe_pd(FrameValues& values, int frame_no) { + for (int w = 0; w < frame::register_save_words; w++) { + values.describe(frame_no, sp() + w, err_msg("register save area word %d", w), 1); + } + + if (is_interpreted_frame()) { + DESCRIBE_FP_OFFSET(interpreter_frame_d_scratch_fp); + DESCRIBE_FP_OFFSET(interpreter_frame_l_scratch_fp); + DESCRIBE_FP_OFFSET(interpreter_frame_mirror); + DESCRIBE_FP_OFFSET(interpreter_frame_oop_temp); + + // esp, according to Lesp (e.g. not depending on bci), if seems valid + intptr_t* esp = *interpreter_frame_esp_addr(); + if ((esp >= sp()) && (esp < fp())) { + values.describe(-1, esp, "*Lesp"); + } + } + + if (!is_compiled_frame()) { + if (frame::callee_aggregate_return_pointer_words != 0) { + values.describe(frame_no, sp() + frame::callee_aggregate_return_pointer_sp_offset, "callee_aggregate_return_pointer_word"); + } + for (int w = 0; w < frame::callee_register_argument_save_area_words; w++) { + values.describe(frame_no, sp() + frame::callee_register_argument_save_area_sp_offset + w, + err_msg("callee_register_argument_save_area_words %d", w)); + } + } +} + +#endif + +intptr_t *frame::initial_deoptimization_info() { + // unused... but returns fp() to minimize changes introduced by 7087445 + return fp(); +} diff -ur --new-file a/src/hotspot/cpu/sparc/frame_sparc.hpp b/src/hotspot/cpu/sparc/frame_sparc.hpp --- a/src/hotspot/cpu/sparc/frame_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/frame_sparc.hpp 2023-04-16 11:42:11.061708986 +0000 @@ -0,0 +1,245 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_FRAME_SPARC_HPP +#define CPU_SPARC_FRAME_SPARC_HPP + +#include "runtime/synchronizer.hpp" + +// A frame represents a physical stack frame (an activation). Frames can be +// C or Java frames, and the Java frames can be interpreted or compiled. +// In contrast, vframes represent source-level activations, so that one physical frame +// can correspond to multiple source level frames because of inlining. +// A frame is comprised of {pc, sp, younger_sp} + + +// Layout of asm interpreter frame: +// +// 0xfffffff +// ...... +// [last extra incoming arg, (local # Nargs > 6 ? Nargs-1 : undef)] +// .. Note: incoming args are copied to local frame area upon entry +// [first extra incoming arg, (local # Nargs > 6 ? 6 : undef)] +// [6 words for C-arg storage (unused)] Are this and next one really needed? +// [C-aggregate-word (unused)] Yes, if want extra params to be in same place as C convention +// [16 words for register saving] <--- FP +// [interpreter_frame_vm_locals ] (see below) + +// Note: Llocals is always double-word aligned +// [first local i.e. local # 0] <-- Llocals +// ... +// [last local, i.e. local # Nlocals-1] + +// [monitors ] +// .... +// [monitors ] <-- Lmonitors (same as Llocals + 6*4 if none) +// (must be double-word aligned because +// monitor element size is constrained to +// doubleword) +// +// <-- Lesp (points 1 past TOS) +// [bottom word used for stack ] +// ... +// [top word used for stack] (first word of stack is double-word aligned) + +// [space for outgoing args (conservatively allocated as max_stack - 6 + interpreter_frame_extra_outgoing_argument_words)] +// [6 words for C-arg storage] +// [C-aggregate-word (unused)] +// [16 words for register saving] <--- SP +// ... +// 0x0000000 +// +// The in registers and local registers are preserved in a block at SP. +// +// The first six in registers (I0..I5) hold the first six locals. +// The locals are used as follows: +// Lesp first free element of expression stack +// (which grows towards __higher__ addresses) +// Lbcp is set to address of bytecode to execute +// It may at times (during GC) be an index instead. +// Lmethod the method being interpreted +// Llocals the base pointer for accessing the locals array +// (lower-numbered locals have lower addresses) +// Lmonitors the base pointer for accessing active monitors +// Lcache a saved pointer to the method's constant pool cache +// +// +// When calling out to another method, +// G5_method is set to method to call, G5_inline_cache_klass may be set, +// parameters are put in O registers, and also extra parameters +// must be cleverly copied from the top of stack to the outgoing param area in the frame, + +// All frames: + + public: + + enum { + // normal return address is 2 words past PC + pc_return_offset = 2 * BytesPerInstWord, + + // size of each block, in order of increasing address: + register_save_words = 16, + callee_aggregate_return_pointer_words = 0, + callee_register_argument_save_area_words = 6, + // memory_parameter_words = , + + // offset of each block, in order of increasing address: + // (note: callee_register_argument_save_area_words == Assembler::n_register_parameters) + register_save_words_sp_offset = 0, + callee_aggregate_return_pointer_sp_offset = register_save_words_sp_offset + register_save_words, + callee_register_argument_save_area_sp_offset = callee_aggregate_return_pointer_sp_offset + callee_aggregate_return_pointer_words, + memory_parameter_word_sp_offset = callee_register_argument_save_area_sp_offset + callee_register_argument_save_area_words, + varargs_offset = memory_parameter_word_sp_offset + }; + + private: + intptr_t* _younger_sp; // optional SP of callee (used to locate O7) + int _sp_adjustment_by_callee; // adjustment in words to SP by callee for making locals contiguous + + // Note: On SPARC, unlike Intel, the saved PC for a stack frame + // is stored at a __variable__ distance from that frame's SP. + // (In fact, it may be in the register save area of the callee frame, + // but that fact need not bother us.) Thus, we must store the + // address of that saved PC explicitly. On the other hand, SPARC + // stores the FP for a frame at a fixed offset from the frame's SP, + // so there is no need for a separate "frame::_fp" field. + + public: + // Accessors + + intptr_t* younger_sp() const { + assert(_younger_sp != NULL, "frame must possess a younger_sp"); + return _younger_sp; + } + + int callee_sp_adjustment() const { return _sp_adjustment_by_callee; } + void set_sp_adjustment_by_callee(int number_of_words) { _sp_adjustment_by_callee = number_of_words; } + + // Constructors + + // This constructor relies on the fact that the creator of a frame + // has flushed register windows which the frame will refer to, and + // that those register windows will not be reloaded until the frame is + // done reading and writing the stack. Moreover, if the "younger_sp" + // argument points into the register save area of the next younger + // frame (though it need not), the register window for that next + // younger frame must also stay flushed. (The caller is responsible + // for ensuring this.) + + frame(intptr_t* sp, intptr_t* younger_sp, bool younger_frame_adjusted_stack = false); + + // make a deficient frame which doesn't know where its PC is: + enum unpatchable_t { unpatchable }; + frame(intptr_t* sp, unpatchable_t, address pc = NULL, CodeBlob* cb = NULL); + + void init(intptr_t* sp, address pc, CodeBlob* cb); + + // Walk from sp outward looking for old_sp, and return old_sp's predecessor + // (i.e. return the sp from the frame where old_sp is the fp). + // Register windows are assumed to be flushed for the stack in question. + + static intptr_t* next_younger_sp_or_null(intptr_t* old_sp, intptr_t* sp); + + // Return true if sp is a younger sp in the stack described by valid_sp. + static bool is_valid_stack_pointer(intptr_t* valid_sp, intptr_t* sp); + + public: + // accessors for the instance variables + intptr_t* fp() const { return (intptr_t*) ((intptr_t)(sp()[FP->sp_offset_in_saved_window()]) + STACK_BIAS ); } + + // All frames + + intptr_t* fp_addr_at(int index) const { return &fp()[index]; } + intptr_t* sp_addr_at(int index) const { return &sp()[index]; } + intptr_t fp_at( int index) const { return *fp_addr_at(index); } + intptr_t sp_at( int index) const { return *sp_addr_at(index); } + + private: + inline address* I7_addr() const; + inline address* O7_addr() const; + + inline address* I0_addr() const; + inline address* O0_addr() const; + intptr_t* younger_sp_addr_at(int index) const { return &younger_sp()[index]; } + + public: + // access to SPARC arguments and argument registers + + // Assumes reg is an in/local register + intptr_t* register_addr(Register reg) const { + return sp_addr_at(reg->sp_offset_in_saved_window()); + } + + // Assumes reg is an out register + intptr_t* out_register_addr(Register reg) const { + return younger_sp_addr_at(reg->after_save()->sp_offset_in_saved_window()); + } + + + // Interpreter frames + + public: + // Asm interpreter + enum interpreter_frame_vm_locals { + // 2 words, also used to save float regs across calls to C + interpreter_frame_d_scratch_fp_offset = -2, + interpreter_frame_l_scratch_fp_offset = -4, + interpreter_frame_mirror_offset = -5, // keep interpreted method alive + + interpreter_frame_oop_temp_offset = -6, // for native calls only + interpreter_frame_vm_locals_fp_offset = -6, // should be same as above, and should be zero mod 8 + + interpreter_frame_vm_local_words = -interpreter_frame_vm_locals_fp_offset, + interpreter_frame_initial_sp_offset = interpreter_frame_vm_local_words, + + // interpreter frame set-up needs to save 2 extra words in outgoing param area + // for class and jnienv arguments for native stubs (see nativeStubGen_sparc.cpp_ + + interpreter_frame_extra_outgoing_argument_words = 2 + }; + + enum compiler_frame_fixed_locals { + compiler_frame_vm_locals_fp_offset = -2 + }; + + private: + ConstantPoolCache** interpreter_frame_cpoolcache_addr() const; + + // where Lmonitors is saved: + inline BasicObjectLock** interpreter_frame_monitors_addr() const; + inline intptr_t** interpreter_frame_esp_addr() const; + + inline void interpreter_frame_set_tos_address(intptr_t* x); + + // monitors: + + // next two fns read and write Lmonitors value, + private: + BasicObjectLock* interpreter_frame_monitors() const; + void interpreter_frame_set_monitors(BasicObjectLock* monitors); + public: + + static jint interpreter_frame_expression_stack_direction() { return -1; } + +#endif // CPU_SPARC_FRAME_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/frame_sparc.inline.hpp b/src/hotspot/cpu/sparc/frame_sparc.inline.hpp --- a/src/hotspot/cpu/sparc/frame_sparc.inline.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/frame_sparc.inline.hpp 2023-04-16 11:42:11.061858702 +0000 @@ -0,0 +1,194 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_FRAME_SPARC_INLINE_HPP +#define CPU_SPARC_FRAME_SPARC_INLINE_HPP + +#include "asm/macroAssembler.hpp" +#include "code/vmreg.inline.hpp" +#include "code/codeCache.hpp" +#include "utilities/align.hpp" + +// Inline functions for SPARC frames: + +// Constructors + +inline frame::frame() { + _pc = NULL; + _sp = NULL; + _younger_sp = NULL; + _cb = NULL; + _deopt_state = unknown; + _sp_adjustment_by_callee = 0; +} + +// Accessors: + +inline bool frame::equal(frame other) const { + bool ret = sp() == other.sp() + && fp() == other.fp() + && pc() == other.pc(); + assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction"); + return ret; +} + +// Return unique id for this frame. The id must have a value where we can distinguish +// identity and younger/older relationship. NULL represents an invalid (incomparable) +// frame. +inline intptr_t* frame::id(void) const { return unextended_sp(); } + +// Return true if the frame is older (less recent activation) than the frame represented by id +inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); + return this->id() > id ; } + +inline int frame::frame_size(RegisterMap* map) const { return sender_sp() - sp(); } + +inline intptr_t* frame::link() const { return (intptr_t *)(fp()[FP->sp_offset_in_saved_window()] + STACK_BIAS); } + +inline intptr_t* frame::unextended_sp() const { return sp() + _sp_adjustment_by_callee; } + +// return address: + +inline address frame::sender_pc() const { return *I7_addr() + pc_return_offset; } + +inline address* frame::I7_addr() const { return (address*) &sp()[ I7->sp_offset_in_saved_window()]; } +inline address* frame::I0_addr() const { return (address*) &sp()[ I0->sp_offset_in_saved_window()]; } + +inline address* frame::O7_addr() const { return (address*) &younger_sp()[ I7->sp_offset_in_saved_window()]; } +inline address* frame::O0_addr() const { return (address*) &younger_sp()[ I0->sp_offset_in_saved_window()]; } + +inline intptr_t* frame::sender_sp() const { return fp(); } + +inline intptr_t* frame::link_or_null() const { + return link(); +} + +inline intptr_t* frame::real_fp() const { return fp(); } + +inline intptr_t** frame::interpreter_frame_locals_addr() const { + return (intptr_t**) sp_addr_at( Llocals->sp_offset_in_saved_window()); +} + +inline intptr_t* frame::interpreter_frame_bcp_addr() const { + return (intptr_t*) sp_addr_at( Lbcp->sp_offset_in_saved_window()); +} + +inline intptr_t* frame::interpreter_frame_mdp_addr() const { + // %%%%% reinterpreting ImethodDataPtr as a mdx + return (intptr_t*) sp_addr_at( ImethodDataPtr->sp_offset_in_saved_window()); +} + +// bottom(base) of the expression stack (highest address) +inline intptr_t* frame::interpreter_frame_expression_stack() const { + return (intptr_t*)interpreter_frame_monitors() - 1; +} + +// top of expression stack (lowest address) +inline intptr_t* frame::interpreter_frame_tos_address() const { + return *interpreter_frame_esp_addr() + 1; +} + +inline BasicObjectLock** frame::interpreter_frame_monitors_addr() const { + return (BasicObjectLock**) sp_addr_at(Lmonitors->sp_offset_in_saved_window()); +} +inline intptr_t** frame::interpreter_frame_esp_addr() const { + return (intptr_t**)sp_addr_at(Lesp->sp_offset_in_saved_window()); +} + +inline void frame::interpreter_frame_set_tos_address( intptr_t* x ) { + *interpreter_frame_esp_addr() = x - 1; +} + +// monitor elements + +// in keeping with Intel side: end is lower in memory than begin; +// and beginning element is oldest element +// Also begin is one past last monitor. + +inline BasicObjectLock* frame::interpreter_frame_monitor_begin() const { + int rounded_vm_local_words = align_up((int)frame::interpreter_frame_vm_local_words, WordsPerLong); + return (BasicObjectLock *)fp_addr_at(-rounded_vm_local_words); +} + +inline BasicObjectLock* frame::interpreter_frame_monitor_end() const { + return interpreter_frame_monitors(); +} + + +inline void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) { + interpreter_frame_set_monitors(value); +} + +inline int frame::interpreter_frame_monitor_size() { + return align_up(BasicObjectLock::size(), WordsPerLong); +} + +inline Method** frame::interpreter_frame_method_addr() const { + return (Method**)sp_addr_at( Lmethod->sp_offset_in_saved_window()); +} + +inline BasicObjectLock* frame::interpreter_frame_monitors() const { + return *interpreter_frame_monitors_addr(); +} + +inline void frame::interpreter_frame_set_monitors(BasicObjectLock* monitors) { + *interpreter_frame_monitors_addr() = monitors; +} + +inline oop* frame::interpreter_frame_mirror_addr() const { + return (oop*)(fp() + interpreter_frame_mirror_offset); +} + +// Constant pool cache + +// where LcpoolCache is saved: +inline ConstantPoolCache** frame::interpreter_frame_cpoolcache_addr() const { + return (ConstantPoolCache**)sp_addr_at(LcpoolCache->sp_offset_in_saved_window()); + } + +inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { + return (ConstantPoolCache**)sp_addr_at( LcpoolCache->sp_offset_in_saved_window()); +} + +inline oop* frame::interpreter_frame_temp_oop_addr() const { + return (oop *)(fp() + interpreter_frame_oop_temp_offset); +} + + +inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { + // note: adjust this code if the link argument in StubGenerator::call_stub() changes! + const Argument link = Argument(0, false); + return (JavaCallWrapper**)&sp()[link.as_in().as_register()->sp_offset_in_saved_window()]; +} + + +inline oop frame::saved_oop_result(RegisterMap* map) const { + return *((oop*) map->location(O0->as_VMReg())); +} + +inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { + *((oop*) map->location(O0->as_VMReg())) = obj; +} + +#endif // CPU_SPARC_FRAME_SPARC_INLINE_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/gc/g1/g1BarrierSetAssembler_sparc.cpp b/src/hotspot/cpu/sparc/gc/g1/g1BarrierSetAssembler_sparc.cpp --- a/src/hotspot/cpu/sparc/gc/g1/g1BarrierSetAssembler_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/gc/g1/g1BarrierSetAssembler_sparc.cpp 2023-04-16 11:42:11.062189826 +0000 @@ -0,0 +1,709 @@ +/* + * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/g1/g1BarrierSet.hpp" +#include "gc/g1/g1BarrierSetAssembler.hpp" +#include "gc/g1/g1BarrierSetRuntime.hpp" +#include "gc/g1/g1CardTable.hpp" +#include "gc/g1/g1DirtyCardQueue.hpp" +#include "gc/g1/g1SATBMarkQueueSet.hpp" +#include "gc/g1/g1ThreadLocalData.hpp" +#include "gc/g1/heapRegion.hpp" +#include "interpreter/interp_masm.hpp" +#include "runtime/sharedRuntime.hpp" +#include "utilities/macros.hpp" +#ifdef COMPILER1 +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "gc/g1/c1/g1BarrierSetC1.hpp" +#endif + +#define __ masm-> + +void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count) { + bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; + // With G1, don't generate the call if we statically know that the target in uninitialized + if (!dest_uninitialized) { + Register tmp = O5; + assert_different_registers(addr, count, tmp); + Label filtered; + // Is marking active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ ld(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp); + } else { + guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ ldsb(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp); + } + // Is marking active? + __ cmp_and_br_short(tmp, G0, Assembler::equal, Assembler::pt, filtered); + + __ save_frame(0); + // Save the necessary global regs... will be used after. + if (addr->is_global()) { + __ mov(addr, L0); + } + if (count->is_global()) { + __ mov(count, L1); + } + __ mov(addr->after_save(), O0); + // Get the count into O1 + address slowpath = UseCompressedOops ? CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry) + : CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry); + __ call(slowpath); + __ delayed()->mov(count->after_save(), O1); + if (addr->is_global()) { + __ mov(L0, addr); + } + if (count->is_global()) { + __ mov(L1, count); + } + __ restore(); + + __ bind(filtered); + DEBUG_ONLY(__ set(0xDEADC0DE, tmp);) // we have killed tmp + } +} + +void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count, Register tmp) { + // Get some new fresh output registers. + __ save_frame(0); + __ mov(addr->after_save(), O0); + __ call(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry)); + __ delayed()->mov(count->after_save(), O1); + __ restore(); +} + +#undef __ + +static address satb_log_enqueue_with_frame = NULL; +static u_char* satb_log_enqueue_with_frame_end = NULL; + +static address satb_log_enqueue_frameless = NULL; +static u_char* satb_log_enqueue_frameless_end = NULL; + +static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions? + +static void generate_satb_log_enqueue(bool with_frame) { + BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize); + CodeBuffer buf(bb); + MacroAssembler masm(&buf); + +#define __ masm. + + address start = __ pc(); + Register pre_val; + + Label refill, restart; + if (with_frame) { + __ save_frame(0); + pre_val = I0; // Was O0 before the save. + } else { + pre_val = O0; + } + + int satb_q_index_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()); + int satb_q_buf_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()); + + assert(in_bytes(SATBMarkQueue::byte_width_of_index()) == sizeof(intptr_t) && + in_bytes(SATBMarkQueue::byte_width_of_buf()) == sizeof(intptr_t), + "check sizes in assembly below"); + + __ bind(restart); + + // Load the index into the SATB buffer. SATBMarkQueue::_index is a size_t + // so ld_ptr is appropriate. + __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0); + + // index == 0? + __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill); + + __ ld_ptr(G2_thread, satb_q_buf_byte_offset, L1); + __ sub(L0, oopSize, L0); + + __ st_ptr(pre_val, L1, L0); // [_buf + index] := I0 + if (!with_frame) { + // Use return-from-leaf + __ retl(); + __ delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset); + } else { + // Not delayed. + __ st_ptr(L0, G2_thread, satb_q_index_byte_offset); + } + if (with_frame) { + __ ret(); + __ delayed()->restore(); + } + __ bind(refill); + + address handle_zero = + CAST_FROM_FN_PTR(address, + &G1SATBMarkQueueSet::handle_zero_index_for_thread); + // This should be rare enough that we can afford to save all the + // scratch registers that the calling context might be using. + __ mov(G1_scratch, L0); + __ mov(G3_scratch, L1); + __ mov(G4, L2); + // We need the value of O0 above (for the write into the buffer), so we + // save and restore it. + __ mov(O0, L3); + // Since the call will overwrite O7, we save and restore that, as well. + __ mov(O7, L4); + __ call_VM_leaf(L5, handle_zero, G2_thread); + __ mov(L0, G1_scratch); + __ mov(L1, G3_scratch); + __ mov(L2, G4); + __ mov(L3, O0); + __ br(Assembler::always, /*annul*/false, Assembler::pt, restart); + __ delayed()->mov(L4, O7); + + if (with_frame) { + satb_log_enqueue_with_frame = start; + satb_log_enqueue_with_frame_end = __ pc(); + } else { + satb_log_enqueue_frameless = start; + satb_log_enqueue_frameless_end = __ pc(); + } + +#undef __ +} + +#define __ masm-> + +void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, + Register obj, + Register index, + int offset, + Register pre_val, + Register tmp, + bool preserve_o_regs) { + Label filtered; + + if (obj == noreg) { + // We are not loading the previous value so make + // sure that we don't trash the value in pre_val + // with the code below. + assert_different_registers(pre_val, tmp); + } else { + // We will be loading the previous value + // in this code so... + assert(offset == 0 || index == noreg, "choose one"); + assert(pre_val == noreg, "check this code"); + } + + // Is marking active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ ld(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp); + } else { + guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ ldsb(G2, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), tmp); + } + + // Is marking active? + __ cmp_and_br_short(tmp, G0, Assembler::equal, Assembler::pt, filtered); + + // Do we need to load the previous value? + if (obj != noreg) { + // Load the previous value... + if (index == noreg) { + if (Assembler::is_simm13(offset)) { + __ load_heap_oop(obj, offset, tmp); + } else { + __ set(offset, tmp); + __ load_heap_oop(obj, tmp, tmp); + } + } else { + __ load_heap_oop(obj, index, tmp); + } + // Previous value has been loaded into tmp + pre_val = tmp; + } + + assert(pre_val != noreg, "must have a real register"); + + // Is the previous value null? + __ cmp_and_brx_short(pre_val, G0, Assembler::equal, Assembler::pt, filtered); + + // OK, it's not filtered, so we'll need to call enqueue. In the normal + // case, pre_val will be a scratch G-reg, but there are some cases in + // which it's an O-reg. In the first case, do a normal call. In the + // latter, do a save here and call the frameless version. + + guarantee(pre_val->is_global() || pre_val->is_out(), + "Or we need to think harder."); + + if (pre_val->is_global() && !preserve_o_regs) { + __ call(satb_log_enqueue_with_frame); + __ delayed()->mov(pre_val, O0); + } else { + __ save_frame(0); + __ call(satb_log_enqueue_frameless); + __ delayed()->mov(pre_val->after_save(), O0); + __ restore(); + } + + __ bind(filtered); +} + +#undef __ + +static address dirty_card_log_enqueue = 0; +static u_char* dirty_card_log_enqueue_end = 0; + +// This gets to assume that o0 contains the object address. +static void generate_dirty_card_log_enqueue(CardTable::CardValue* byte_map_base) { + BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2); + CodeBuffer buf(bb); + MacroAssembler masm(&buf); +#define __ masm. + address start = __ pc(); + + Label not_already_dirty, restart, refill, young_card; + + __ srlx(O0, CardTable::card_shift, O0); + AddressLiteral addrlit(byte_map_base); + __ set(addrlit, O1); // O1 := + __ ldub(O0, O1, O2); // O2 := [O0 + O1] + + __ cmp_and_br_short(O2, G1CardTable::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card); + + __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad)); + __ ldub(O0, O1, O2); // O2 := [O0 + O1] + + assert(G1CardTable::dirty_card_val() == 0, "otherwise check this code"); + __ cmp_and_br_short(O2, G0, Assembler::notEqual, Assembler::pt, not_already_dirty); + + __ bind(young_card); + // We didn't take the branch, so we're already dirty: return. + // Use return-from-leaf + __ retl(); + __ delayed()->nop(); + + // Not dirty. + __ bind(not_already_dirty); + + // Get O0 + O1 into a reg by itself + __ add(O0, O1, O3); + + // First, dirty it. + __ stb(G0, O3, G0); // [cardPtr] := 0 (i.e., dirty). + + int dirty_card_q_index_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()); + int dirty_card_q_buf_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()); + __ bind(restart); + + // Load the index into the update buffer. G1DirtyCardQueue::_index is + // a size_t so ld_ptr is appropriate here. + __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0); + + // index == 0? + __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill); + + __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1); + __ sub(L0, oopSize, L0); + + __ st_ptr(O3, L1, L0); // [_buf + index] := I0 + // Use return-from-leaf + __ retl(); + __ delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset); + + __ bind(refill); + address handle_zero = + CAST_FROM_FN_PTR(address, + &G1DirtyCardQueueSet::handle_zero_index_for_thread); + // This should be rare enough that we can afford to save all the + // scratch registers that the calling context might be using. + __ mov(G1_scratch, L3); + __ mov(G3_scratch, L5); + // We need the value of O3 above (for the write into the buffer), so we + // save and restore it. + __ mov(O3, L6); + // Since the call will overwrite O7, we save and restore that, as well. + __ mov(O7, L4); + + __ call_VM_leaf(L7_thread_cache, handle_zero, G2_thread); + __ mov(L3, G1_scratch); + __ mov(L5, G3_scratch); + __ mov(L6, O3); + __ br(Assembler::always, /*annul*/false, Assembler::pt, restart); + __ delayed()->mov(L4, O7); + + dirty_card_log_enqueue = start; + dirty_card_log_enqueue_end = __ pc(); + // XXX Should have a guarantee here about not going off the end! + // Does it already do so? Do an experiment... + +#undef __ + +} + +#define __ masm-> + +void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Register store_addr, Register new_val, Register tmp) { + Label filtered; + MacroAssembler* post_filter_masm = masm; + + if (new_val == G0) return; + + G1BarrierSet* bs = barrier_set_cast(BarrierSet::barrier_set()); + + __ xor3(store_addr, new_val, tmp); + __ srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp); + + __ cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pt, filtered); + + // If the "store_addr" register is an "in" or "local" register, move it to + // a scratch reg so we can pass it as an argument. + bool use_scr = !(store_addr->is_global() || store_addr->is_out()); + // Pick a scratch register different from "tmp". + Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch); + // Make sure we use up the delay slot! + if (use_scr) { + post_filter_masm->mov(store_addr, scr); + } else { + post_filter_masm->nop(); + } + __ save_frame(0); + __ call(dirty_card_log_enqueue); + if (use_scr) { + __ delayed()->mov(scr, O0); + } else { + __ delayed()->mov(store_addr->after_save(), O0); + } + __ restore(); + + __ bind(filtered); +} + +void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register val, Address dst, Register tmp) { + bool in_heap = (decorators & IN_HEAP) != 0; + bool as_normal = (decorators & AS_NORMAL) != 0; + assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported"); + + bool needs_pre_barrier = as_normal; + // No need for post barrier if storing NULL + bool needs_post_barrier = val != G0 && in_heap; + + bool is_array = (decorators & IS_ARRAY) != 0; + bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; + bool precise = is_array || on_anonymous; + + Register index = dst.has_index() ? dst.index() : noreg; + int disp = dst.has_disp() ? dst.disp() : 0; + + if (needs_pre_barrier) { + // Load and record the previous value. + g1_write_barrier_pre(masm, dst.base(), index, disp, + noreg /* pre_val */, + tmp, true /*preserve_o_regs*/); + } + + Register new_val = val; + if (needs_post_barrier) { + // G1 barrier needs uncompressed oop for region cross check. + if (UseCompressedOops && val != G0) { + new_val = tmp; + __ mov(val, new_val); + } + } + + BarrierSetAssembler::store_at(masm, decorators, type, val, dst, tmp); + + if (needs_post_barrier) { + Register base = dst.base(); + if (precise) { + if (!dst.has_index()) { + __ add(base, disp, base); + } else { + assert(!dst.has_disp(), "not supported yet"); + __ add(base, index, base); + } + } + g1_write_barrier_post(masm, base, new_val, tmp); + } +} + +void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address src, Register dst, Register tmp) { + bool on_oop = is_reference_type(type); + bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; + bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; + bool on_reference = on_weak || on_phantom; + // Load the value of the referent field. + ModRefBarrierSetAssembler::load_at(masm, decorators, type, src, dst, tmp); + if (on_oop && on_reference) { + // Generate the G1 pre-barrier code to log the value of + // the referent field in an SATB buffer. Note with + // these parameters the pre-barrier does not generate + // the load of the previous value + + Register pre_val = dst; + bool saved = false; + if (pre_val->is_in()) { + // The g1_write_barrier_pre method assumes that the pre_val + // is not in an input register. + __ save_frame_and_mov(0, pre_val, O0); + pre_val = O0; + saved = true; + } + + g1_write_barrier_pre(masm, noreg /* obj */, noreg /* index */, 0 /* offset */, + pre_val /* pre_val */, + tmp /* tmp */, + true /* preserve_o_regs */); + + if (saved) { + __ restore(); + } + } +} + +void G1BarrierSetAssembler::barrier_stubs_init() { + if (dirty_card_log_enqueue == 0) { + G1BarrierSet* bs = barrier_set_cast(BarrierSet::barrier_set()); + CardTable *ct = bs->card_table(); + generate_dirty_card_log_enqueue(ct->byte_map_base()); + assert(dirty_card_log_enqueue != 0, "postcondition."); + } + if (satb_log_enqueue_with_frame == 0) { + generate_satb_log_enqueue(true); + assert(satb_log_enqueue_with_frame != 0, "postcondition."); + } + if (satb_log_enqueue_frameless == 0) { + generate_satb_log_enqueue(false); + assert(satb_log_enqueue_frameless != 0, "postcondition."); + } +} + +#ifdef COMPILER1 + +#undef __ +#define __ ce->masm()-> + +void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) { + G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); + // At this point we know that marking is in progress. + // If do_load() is true then we have to emit the + // load of the previous value; otherwise it has already + // been loaded into _pre_val. + + __ bind(*stub->entry()); + + assert(stub->pre_val()->is_register(), "Precondition."); + Register pre_val_reg = stub->pre_val()->as_register(); + + if (stub->do_load()) { + ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); + } + + if (__ is_in_wdisp16_range(*stub->continuation())) { + __ br_null(pre_val_reg, /*annul*/false, Assembler::pt, *stub->continuation()); + } else { + __ cmp(pre_val_reg, G0); + __ brx(Assembler::equal, false, Assembler::pn, *stub->continuation()); + } + __ delayed()->nop(); + + __ call(bs->pre_barrier_c1_runtime_code_blob()->code_begin()); + __ delayed()->mov(pre_val_reg, G4); + __ br(Assembler::always, false, Assembler::pt, *stub->continuation()); + __ delayed()->nop(); +} + +void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) { + G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); + __ bind(*stub->entry()); + + assert(stub->addr()->is_register(), "Precondition."); + assert(stub->new_val()->is_register(), "Precondition."); + Register addr_reg = stub->addr()->as_pointer_register(); + Register new_val_reg = stub->new_val()->as_register(); + + if (__ is_in_wdisp16_range(*stub->continuation())) { + __ br_null(new_val_reg, /*annul*/false, Assembler::pt, *stub->continuation()); + } else { + __ cmp(new_val_reg, G0); + __ brx(Assembler::equal, false, Assembler::pn, *stub->continuation()); + } + __ delayed()->nop(); + + __ call(bs->post_barrier_c1_runtime_code_blob()->code_begin()); + __ delayed()->mov(addr_reg, G4); + __ br(Assembler::always, false, Assembler::pt, *stub->continuation()); + __ delayed()->nop(); +} + +#undef __ +#define __ sasm-> + +void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { + __ prologue("g1_pre_barrier", false); + + // G4: previous value of memory + + Register pre_val = G4; + Register tmp = G1_scratch; + Register tmp2 = G3_scratch; + + Label refill, restart; + int satb_q_active_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()); + int satb_q_index_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()); + int satb_q_buf_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()); + + // Is marking still active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ ld(G2_thread, satb_q_active_byte_offset, tmp); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ ldsb(G2_thread, satb_q_active_byte_offset, tmp); + } + __ cmp_and_br_short(tmp, G0, Assembler::notEqual, Assembler::pt, restart); + __ retl(); + __ delayed()->nop(); + + __ bind(restart); + // Load the index into the SATB buffer. SATBMarkQueue::_index is a + // size_t so ld_ptr is appropriate + __ ld_ptr(G2_thread, satb_q_index_byte_offset, tmp); + + // index == 0? + __ cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pn, refill); + + __ ld_ptr(G2_thread, satb_q_buf_byte_offset, tmp2); + __ sub(tmp, oopSize, tmp); + + __ st_ptr(pre_val, tmp2, tmp); // [_buf + index] := + // Use return-from-leaf + __ retl(); + __ delayed()->st_ptr(tmp, G2_thread, satb_q_index_byte_offset); + + __ bind(refill); + + __ save_live_registers_no_oop_map(true); + + __ call_VM_leaf(L7_thread_cache, + CAST_FROM_FN_PTR(address, + G1SATBMarkQueueSet::handle_zero_index_for_thread), + G2_thread); + + __ restore_live_registers(true); + + __ br(Assembler::always, /*annul*/false, Assembler::pt, restart); + __ epilogue(); +} + +void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) { + __ prologue("g1_post_barrier", false); + + G1BarrierSet* bs = barrier_set_cast(BarrierSet::barrier_set()); + + Register addr = G4; + Register cardtable = G5; + Register tmp = G1_scratch; + Register tmp2 = G3_scratch; + CardTable::CardValue* byte_map_base = bs->card_table()->byte_map_base(); + + Label not_already_dirty, restart, refill, young_card; + +#ifdef _LP64 + __ srlx(addr, CardTable::card_shift, addr); +#else + __ srl(addr, CardTable::card_shift, addr); +#endif + + AddressLiteral rs((address)byte_map_base); + __ set(rs, cardtable); // cardtable := + __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable] + + __ cmp_and_br_short(tmp, G1CardTable::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card); + + __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad)); + __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable] + + assert(G1CardTable::dirty_card_val() == 0, "otherwise check this code"); + __ cmp_and_br_short(tmp, G0, Assembler::notEqual, Assembler::pt, not_already_dirty); + + __ bind(young_card); + // We didn't take the branch, so we're already dirty: return. + // Use return-from-leaf + __ retl(); + __ delayed()->nop(); + + // Not dirty. + __ bind(not_already_dirty); + + // Get cardtable + tmp into a reg by itself + __ add(addr, cardtable, tmp2); + + // First, dirty it. + __ stb(G0, tmp2, 0); // [cardPtr] := 0 (i.e., dirty). + + Register tmp3 = cardtable; + Register tmp4 = tmp; + + // these registers are now dead + addr = cardtable = tmp = noreg; + + int dirty_card_q_index_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()); + int dirty_card_q_buf_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()); + + __ bind(restart); + + // Get the index into the update buffer. G1DirtyCardQueue::_index is + // a size_t so ld_ptr is appropriate here. + __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, tmp3); + + // index == 0? + __ cmp_and_brx_short(tmp3, G0, Assembler::equal, Assembler::pn, refill); + + __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, tmp4); + __ sub(tmp3, oopSize, tmp3); + + __ st_ptr(tmp2, tmp4, tmp3); // [_buf + index] := + // Use return-from-leaf + __ retl(); + __ delayed()->st_ptr(tmp3, G2_thread, dirty_card_q_index_byte_offset); + + __ bind(refill); + + __ save_live_registers_no_oop_map(true); + + __ call_VM_leaf(L7_thread_cache, + CAST_FROM_FN_PTR(address, + G1DirtyCardQueueSet::handle_zero_index_for_thread), + G2_thread); + + __ restore_live_registers(true); + + __ br(Assembler::always, /*annul*/false, Assembler::pt, restart); + __ epilogue(); +} + +#undef __ + +#endif // COMPILER1 diff -ur --new-file a/src/hotspot/cpu/sparc/gc/g1/g1BarrierSetAssembler_sparc.hpp b/src/hotspot/cpu/sparc/gc/g1/g1BarrierSetAssembler_sparc.hpp --- a/src/hotspot/cpu/sparc/gc/g1/g1BarrierSetAssembler_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/gc/g1/g1BarrierSetAssembler_sparc.hpp 2023-04-16 11:42:11.062308621 +0000 @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_GC_G1_G1BARRIERSETASSEMBLER_SPARC_HPP +#define CPU_SPARC_GC_G1_G1BARRIERSETASSEMBLER_SPARC_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/modRefBarrierSetAssembler.hpp" +#include "utilities/macros.hpp" + +class LIR_Assembler; +class StubAssembler; +class G1PreBarrierStub; +class G1PostBarrierStub; + +class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { +protected: + virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count); + virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp); + + void g1_write_barrier_pre(MacroAssembler* masm, Register obj, Register index, int offset, Register pre_val, Register tmp, bool preserve_o_regs); + void g1_write_barrier_post(MacroAssembler* masm, Register store_addr, Register new_val, Register tmp); + + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register val, Address dst, Register tmp); + +public: +#ifdef COMPILER1 + void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub); + void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub); + + void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); + void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); +#endif + + virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address src, Register dst, Register tmp); + virtual void barrier_stubs_init(); +}; + +#endif // CPU_SPARC_GC_G1_G1BARRIERSETASSEMBLER_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/gc/shared/barrierSetAssembler_sparc.cpp b/src/hotspot/cpu/sparc/gc/shared/barrierSetAssembler_sparc.cpp --- a/src/hotspot/cpu/sparc/gc/shared/barrierSetAssembler_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/gc/shared/barrierSetAssembler_sparc.cpp 2023-04-16 11:42:11.062457898 +0000 @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "interpreter/interp_masm.hpp" +#include "runtime/jniHandles.hpp" + +#define __ masm-> + +void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register val, Address dst, Register tmp) { + bool in_heap = (decorators & IN_HEAP) != 0; + bool in_native = (decorators & IN_NATIVE) != 0; + bool is_not_null = (decorators & IS_NOT_NULL) != 0; + + switch (type) { + case T_ARRAY: + case T_OBJECT: { + if (in_heap) { + if (dst.has_disp() && !Assembler::is_simm13(dst.disp())) { + assert(!dst.has_index(), "not supported yet"); + __ set(dst.disp(), tmp); + dst = Address(dst.base(), tmp); + } + if (UseCompressedOops) { + assert(dst.base() != val, "not enough registers"); + if (is_not_null) { + __ encode_heap_oop_not_null(val); + } else { + __ encode_heap_oop(val); + } + __ st(val, dst); + } else { + __ st_ptr(val, dst); + } + } else { + assert(in_native, "why else?"); + __ st_ptr(val, dst); + } + break; + } + default: Unimplemented(); + } +} + +void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address src, Register dst, Register tmp) { + bool in_heap = (decorators & IN_HEAP) != 0; + bool in_native = (decorators & IN_NATIVE) != 0; + bool is_not_null = (decorators & IS_NOT_NULL) != 0; + + switch (type) { + case T_ARRAY: + case T_OBJECT: { + if (in_heap) { + if (src.has_disp() && !Assembler::is_simm13(src.disp())) { + assert(!src.has_index(), "not supported yet"); + __ set(src.disp(), tmp); + src = Address(src.base(), tmp); + } + if (UseCompressedOops) { + __ lduw(src, dst); + if (is_not_null) { + __ decode_heap_oop_not_null(dst); + } else { + __ decode_heap_oop(dst); + } + } else { + __ ld_ptr(src, dst); + } + } else { + assert(in_native, "why else?"); + __ ld_ptr(src, dst); + } + break; + } + default: Unimplemented(); + } +} + +void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, + Register obj, Register tmp, Label& slowpath) { + __ andn(obj, JNIHandles::weak_tag_mask, obj); + __ ld_ptr(obj, 0, obj); +} diff -ur --new-file a/src/hotspot/cpu/sparc/gc/shared/barrierSetAssembler_sparc.hpp b/src/hotspot/cpu/sparc/gc/shared/barrierSetAssembler_sparc.hpp --- a/src/hotspot/cpu/sparc/gc/shared/barrierSetAssembler_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/gc/shared/barrierSetAssembler_sparc.hpp 2023-04-16 11:42:11.062579973 +0000 @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_GC_SHARED_BARRIERSETASSEMBLER_SPARC_HPP +#define CPU_SPARC_GC_SHARED_BARRIERSETASSEMBLER_SPARC_HPP + +#include "asm/macroAssembler.hpp" +#include "memory/allocation.hpp" +#include "oops/access.hpp" + +class InterpreterMacroAssembler; + +class BarrierSetAssembler: public CHeapObj { +public: + virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register src, Register dst, Register count) {} + virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register src, Register dst, Register count) {} + + virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register src, Address dst, Register tmp); + + virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address src, Register dst, Register tmp); + + // Support for jniFastGetField to try resolving a jobject/jweak in native + virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, + Register obj, Register tmp, Label& slowpath); + + virtual void barrier_stubs_init() {} +}; + +#endif // CPU_SPARC_GC_SHARED_BARRIERSETASSEMBLER_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/gc/shared/barrierSetNMethod_sparc.cpp b/src/hotspot/cpu/sparc/gc/shared/barrierSetNMethod_sparc.cpp --- a/src/hotspot/cpu/sparc/gc/shared/barrierSetNMethod_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/gc/shared/barrierSetNMethod_sparc.cpp 2023-04-16 11:42:11.062713770 +0000 @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc/shared/barrierSetNMethod.hpp" +#include "utilities/debug.hpp" + +void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) { + ShouldNotReachHere(); +} + +void BarrierSetNMethod::disarm(nmethod* nm) { + ShouldNotReachHere(); +} + +bool BarrierSetNMethod::is_armed(nmethod* nm) { + ShouldNotReachHere(); + return false; +} diff -ur --new-file a/src/hotspot/cpu/sparc/gc/shared/cardTableBarrierSetAssembler_sparc.cpp b/src/hotspot/cpu/sparc/gc/shared/cardTableBarrierSetAssembler_sparc.cpp --- a/src/hotspot/cpu/sparc/gc/shared/cardTableBarrierSetAssembler_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/gc/shared/cardTableBarrierSetAssembler_sparc.cpp 2023-04-16 11:42:11.062860623 +0000 @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/cardTable.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "gc/shared/cardTableBarrierSetAssembler.hpp" +#include "interpreter/interp_masm.hpp" + +#define __ masm-> + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count, Register tmp) { + CardTableBarrierSet* ctbs = barrier_set_cast(BarrierSet::barrier_set()); + CardTable* ct = ctbs->card_table(); + assert_different_registers(addr, count, tmp); + + Label L_loop, L_done; + + __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_done); // zero count - nothing to do + + __ sll_ptr(count, LogBytesPerHeapOop, count); + __ sub(count, BytesPerHeapOop, count); + __ add(count, addr, count); + // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.) + __ srl_ptr(addr, CardTable::card_shift, addr); + __ srl_ptr(count, CardTable::card_shift, count); + __ sub(count, addr, count); + AddressLiteral rs(ct->byte_map_base()); + __ set(rs, tmp); + __ BIND(L_loop); + __ stb(G0, tmp, addr); + __ subcc(count, 1, count); + __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); + __ delayed()->add(addr, 1, addr); + + __ BIND(L_done); +} + +void CardTableBarrierSetAssembler::card_table_write(MacroAssembler* masm, + CardTable::CardValue* byte_map_base, + Register tmp, Register obj) { + __ srlx(obj, CardTable::card_shift, obj); + assert(tmp != obj, "need separate temp reg"); + __ set((address) byte_map_base, tmp); + __ stb(G0, tmp, obj); +} + +void CardTableBarrierSetAssembler::card_write_barrier_post(MacroAssembler* masm, Register store_addr, Register new_val, Register tmp) { + // If we're writing constant NULL, we can skip the write barrier. + if (new_val == G0) return; + CardTableBarrierSet* bs = barrier_set_cast(BarrierSet::barrier_set()); + card_table_write(masm, bs->card_table()->byte_map_base(), tmp, store_addr); +} + +void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register val, Address dst, Register tmp) { + bool in_heap = (decorators & IN_HEAP) != 0; + + bool is_array = (decorators & IS_ARRAY) != 0; + bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; + bool precise = is_array || on_anonymous; + + // No need for post barrier if storing NULL + bool needs_post_barrier = val != G0 && in_heap; + + BarrierSetAssembler::store_at(masm, decorators, type, val, dst, tmp); + if (needs_post_barrier) { + Register base = dst.base(); + if (precise) { + if (!dst.has_index()) { + __ add(base, dst.disp(), base); + } else { + assert(!dst.has_disp(), "not supported yet"); + __ add(base, dst.index(), base); + } + } + card_write_barrier_post(masm, base, val, tmp); + } +} diff -ur --new-file a/src/hotspot/cpu/sparc/gc/shared/cardTableBarrierSetAssembler_sparc.hpp b/src/hotspot/cpu/sparc/gc/shared/cardTableBarrierSetAssembler_sparc.hpp --- a/src/hotspot/cpu/sparc/gc/shared/cardTableBarrierSetAssembler_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/gc/shared/cardTableBarrierSetAssembler_sparc.hpp 2023-04-16 11:42:11.062989059 +0000 @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_SPARC_HPP +#define CPU_SPARC_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_SPARC_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/cardTable.hpp" +#include "gc/shared/modRefBarrierSetAssembler.hpp" + +class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler { +protected: + virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count, Register tmp); + + void card_table_write(MacroAssembler* masm, CardTable::CardValue* byte_map_base, Register tmp, Register obj); + + void card_write_barrier_post(MacroAssembler* masm, Register store_addr, Register new_val, Register tmp); + + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register val, Address dst, Register tmp); +}; + +#endif // CPU_SPARC_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/gc/shared/modRefBarrierSetAssembler_sparc.cpp b/src/hotspot/cpu/sparc/gc/shared/modRefBarrierSetAssembler_sparc.cpp --- a/src/hotspot/cpu/sparc/gc/shared/modRefBarrierSetAssembler_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/gc/shared/modRefBarrierSetAssembler_sparc.cpp 2023-04-16 11:42:11.063122861 +0000 @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/modRefBarrierSetAssembler.hpp" + +#define __ masm-> + +void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register src, Register dst, Register count) { + if (type == T_OBJECT) { + bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0; + if (!checkcast) { + // save arguments for barrier generation + __ mov(dst, G1); + __ mov(count, G5); + gen_write_ref_array_pre_barrier(masm, decorators, G1, G5); + } else { + gen_write_ref_array_pre_barrier(masm, decorators, dst, count); + } + } +} + +void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register src, Register dst, Register count) { + if (type == T_OBJECT) { + bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0; + if (!checkcast) { + // O0 is used as temp register + gen_write_ref_array_post_barrier(masm, decorators, G1, G5, O0); + } else { + gen_write_ref_array_post_barrier(masm, decorators, dst, count, O3); + } + } +} + +void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register val, Address dst, Register tmp) { + if (is_reference_type(type)) { + oop_store_at(masm, decorators, type, val, dst, tmp); + } else { + BarrierSetAssembler::store_at(masm, decorators, type, val, dst, tmp); + } +} diff -ur --new-file a/src/hotspot/cpu/sparc/gc/shared/modRefBarrierSetAssembler_sparc.hpp b/src/hotspot/cpu/sparc/gc/shared/modRefBarrierSetAssembler_sparc.hpp --- a/src/hotspot/cpu/sparc/gc/shared/modRefBarrierSetAssembler_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/gc/shared/modRefBarrierSetAssembler_sparc.hpp 2023-04-16 11:42:11.063252824 +0000 @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_GC_SHARED_MODREFBARRIERSETASSEMBLER_SPARC_HPP +#define CPU_SPARC_GC_SHARED_MODREFBARRIERSETASSEMBLER_SPARC_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/barrierSetAssembler.hpp" + +// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other +// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected +// accesses, which are overridden in the concrete BarrierSetAssembler. + +class ModRefBarrierSetAssembler: public BarrierSetAssembler { +protected: + virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count) {} + virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count, Register tmp) {} + + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register val, Address dst, Register tmp) = 0; +public: + virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register src, Register dst, Register count); + virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register src, Register dst, Register count); + + virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register val, Address dst, Register tmp); +}; + +#endif // CPU_SPARC_GC_SHARED_MODREFBARRIERSETASSEMBLER_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/globalDefinitions_sparc.hpp b/src/hotspot/cpu/sparc/globalDefinitions_sparc.hpp --- a/src/hotspot/cpu/sparc/globalDefinitions_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/globalDefinitions_sparc.hpp 2023-04-16 11:42:11.063380083 +0000 @@ -0,0 +1,61 @@ +/* + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_GLOBALDEFINITIONS_SPARC_HPP +#define CPU_SPARC_GLOBALDEFINITIONS_SPARC_HPP + +// Size of Sparc Instructions +const int BytesPerInstWord = 4; + +const int StackAlignmentInBytes = (2*wordSize); + +// Indicates whether the C calling conventions require that +// 32-bit integer argument values are extended to 64 bits. +const bool CCallingConventionRequiresIntsAsLongs = true; + +#define SUPPORTS_NATIVE_CX8 + +#define CPU_MULTI_COPY_ATOMIC + +// The expected size in bytes of a cache line, used to pad data structures. +#if COMPILER1_AND_COMPILER2 + // tiered, 64-bit, large machine + #define DEFAULT_CACHE_LINE_SIZE 128 + #define OM_CACHE_LINE_SIZE 64 +#elif defined(COMPILER1) + // pure C1, 32-bit, small machine + #define DEFAULT_CACHE_LINE_SIZE 16 +#elif defined(COMPILER2) + // pure C2, 64-bit, large machine + #define DEFAULT_CACHE_LINE_SIZE 128 + #define OM_CACHE_LINE_SIZE 64 +#endif + +#if defined(SOLARIS) +#define SUPPORT_RESERVED_STACK_AREA +#endif + +#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS true + +#endif // CPU_SPARC_GLOBALDEFINITIONS_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/globals_sparc.hpp b/src/hotspot/cpu/sparc/globals_sparc.hpp --- a/src/hotspot/cpu/sparc/globals_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/globals_sparc.hpp 2023-04-16 11:42:11.063551576 +0000 @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_GLOBALS_SPARC_HPP +#define CPU_SPARC_GLOBALS_SPARC_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the runtime system. +// (see globals.hpp) + +// For sparc we do not do call backs when a thread is in the interpreter, because the +// interpreter dispatch needs at least two instructions - first to load the dispatch address +// in a register, and second to jmp. The swapping of the dispatch table may occur _after_ +// the load of the dispatch address and hence the jmp would still go to the location +// according to the prior table. So, we let the thread continue and let it block by itself. +define_pd_global(bool, DontYieldALot, true); // yield no more than 100 times per second + +define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks +define_pd_global(bool, TrapBasedNullChecks, false); // Not needed on sparc. +define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast + +define_pd_global(uintx, CodeCacheSegmentSize, 64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment. +define_pd_global(intx, CodeEntryAlignment, 32); +// The default setting 16/16 seems to work best. +// (For _228_jack 16/16 is 2% better than 4/4, 16/4, 32/32, 32/16, or 16/32.) +define_pd_global(intx, OptoLoopAlignment, 16); // = 4*wordSize +define_pd_global(intx, InlineFrequencyCount, 50); // we can use more inlining on the SPARC +define_pd_global(intx, InlineSmallCode, 1500); + +#define DEFAULT_STACK_YELLOW_PAGES (2) +#define DEFAULT_STACK_RED_PAGES (1) +#define DEFAULT_STACK_RESERVED_PAGES (SOLARIS_ONLY(1) NOT_SOLARIS(0)) + +define_pd_global(intx, CompilerThreadStackSize, 1024); +define_pd_global(intx, ThreadStackSize, 1024); +define_pd_global(intx, VMThreadStackSize, 1024); +#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+2)) + +#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES +#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES +#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES +#define MIN_STACK_RESERVED_PAGES (0) + +define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES); +define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES); +define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); +define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); + +define_pd_global(bool, RewriteBytecodes, true); +define_pd_global(bool, RewriteFrequentPairs, true); + +define_pd_global(bool, PreserveFramePointer, false); + +define_pd_global(uintx, TypeProfileLevel, 111); + +define_pd_global(bool, CompactStrings, true); + +define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong); + +#define ARCH_FLAGS(develop, \ + product, \ + notproduct, \ + range, \ + constraint) \ + \ + product(intx, UseVIS, 99, \ + "Highest supported VIS instructions set on SPARC") \ + range(0, 99) \ + \ + product(bool, UseCBCond, false, \ + "Use compare and branch instruction on SPARC") \ + \ + product(bool, UseMPMUL, false, \ + "Use multi-precision multiply instruction (mpmul) on SPARC") \ + \ + product(bool, UseBlockZeroing, false, \ + "Use special cpu instructions for block zeroing") \ + \ + product(intx, BlockZeroingLowLimit, 2048, \ + "Minimum size in bytes when block zeroing will be used") \ + range(1, max_jint) \ + \ + product(bool, UseBlockCopy, false, \ + "Use special cpu instructions for block copy") \ + \ + product(intx, BlockCopyLowLimit, 2048, \ + "Minimum size in bytes when block copy will be used") \ + range(1, max_jint) \ + \ + product(bool, UseNiagaraInstrs, false, \ + "Use Niagara-efficient instruction subset") \ + \ + develop(bool, UseCASForSwap, false, \ + "Do not use swap instructions, but only CAS (in a loop) on SPARC")\ + \ + product(uintx, ArraycopySrcPrefetchDistance, 0, \ + "Distance to prefetch source array in arraycopy") \ + constraint(ArraycopySrcPrefetchDistanceConstraintFunc, AfterErgo) \ + \ + product(uintx, ArraycopyDstPrefetchDistance, 0, \ + "Distance to prefetch destination array in arraycopy") \ + constraint(ArraycopyDstPrefetchDistanceConstraintFunc, AfterErgo) \ + +#endif // CPU_SPARC_GLOBALS_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/icBuffer_sparc.cpp b/src/hotspot/cpu/sparc/icBuffer_sparc.cpp --- a/src/hotspot/cpu/sparc/icBuffer_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/icBuffer_sparc.cpp 2023-04-16 11:42:11.063680808 +0000 @@ -0,0 +1,72 @@ +/* + * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/icBuffer.hpp" +#include "gc/shared/collectedHeap.inline.hpp" +#include "interpreter/bytecodes.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_sparc.hpp" +#include "oops/oop.inline.hpp" + +int InlineCacheBuffer::ic_stub_code_size() { + return (NativeMovConstReg::instruction_size + // sethi;add + NativeJump::instruction_size + // sethi; jmp; delay slot + (1*BytesPerInstWord) + 1); // flush + 1 extra byte +} + +void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) { + ResourceMark rm; + CodeBuffer code(code_begin, ic_stub_code_size()); + MacroAssembler* masm = new MacroAssembler(&code); + // note: even though the code contains an embedded metadata, we do not need reloc info + // because + // (1) the metadata is old (i.e., doesn't matter for scavenges) + // (2) these ICStubs are removed *before* a GC happens, so the roots disappear + AddressLiteral cached_value_addrlit((address)cached_value, relocInfo::none); + // Force the set to generate the fixed sequence so next_instruction_address works + masm->patchable_set(cached_value_addrlit, G5_inline_cache_reg); + assert(G3_scratch != G5_method, "Do not clobber the method oop in the transition stub"); + assert(G3_scratch != G5_inline_cache_reg, "Do not clobber the inline cache register in the transition stub"); + AddressLiteral entry(entry_point); + masm->JUMP(entry, G3_scratch, 0); + masm->delayed()->nop(); + masm->flush(); +} + + +address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { + NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object + NativeJump* jump = nativeJump_at(move->next_instruction_address()); + return jump->jump_destination(); +} + + +void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { + NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object + NativeJump* jump = nativeJump_at(move->next_instruction_address()); + void* o = (void*)move->data(); + return o; +} diff -ur --new-file a/src/hotspot/cpu/sparc/icache_sparc.cpp b/src/hotspot/cpu/sparc/icache_sparc.cpp --- a/src/hotspot/cpu/sparc/icache_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/icache_sparc.cpp 2023-04-16 11:42:11.063793154 +0000 @@ -0,0 +1,50 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "runtime/icache.hpp" + +#define __ _masm-> + +void ICacheStubGenerator::generate_icache_flush( + ICache::flush_icache_stub_t* flush_icache_stub +) { + StubCodeMark mark(this, "ICache", "flush_icache_stub"); + address start = __ pc(); + + Label L; + __ bind(L); + __ flush( O0, G0 ); + __ deccc( O1 ); + __ br(Assembler::positive, false, Assembler::pn, L); + __ delayed()->inc( O0, 8 ); + __ retl(false); + __ delayed()->mov( O2, O0 ); // handshake with caller to make sure it happened! + + // Must be set here so StubCodeMark destructor can call the flush stub. + *flush_icache_stub = (ICache::flush_icache_stub_t)start; +}; + +#undef __ diff -ur --new-file a/src/hotspot/cpu/sparc/icache_sparc.hpp b/src/hotspot/cpu/sparc/icache_sparc.hpp --- a/src/hotspot/cpu/sparc/icache_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/icache_sparc.hpp 2023-04-16 11:42:11.063909602 +0000 @@ -0,0 +1,43 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_ICACHE_SPARC_HPP +#define CPU_SPARC_ICACHE_SPARC_HPP + +// Interface for updating the instruction cache. Whenever the VM modifies +// code, part of the processor instruction cache potentially has to be flushed. + + +class ICache : public AbstractICache { + public: + enum { + stub_size = 160, // Size of the icache flush stub in bytes + line_size = 8, // flush instruction affects a dword + log2_line_size = 3 // log2(line_size) + }; + + // Use default implementation +}; + +#endif // CPU_SPARC_ICACHE_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/interp_masm_sparc.cpp b/src/hotspot/cpu/sparc/interp_masm_sparc.cpp --- a/src/hotspot/cpu/sparc/interp_masm_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/interp_masm_sparc.cpp 2023-04-16 11:42:11.064797287 +0000 @@ -0,0 +1,2552 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "interp_masm_sparc.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "logging/log.hpp" +#include "oops/arrayOop.hpp" +#include "oops/markWord.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "oops/methodCounters.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/basicLock.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/thread.inline.hpp" +#include "utilities/align.hpp" +#include "utilities/powerOfTwo.hpp" + +// Implementation of InterpreterMacroAssembler + +// This file specializes the assembler with interpreter-specific macros + +const Address InterpreterMacroAssembler::l_tmp(FP, (frame::interpreter_frame_l_scratch_fp_offset * wordSize) + STACK_BIAS); +const Address InterpreterMacroAssembler::d_tmp(FP, (frame::interpreter_frame_d_scratch_fp_offset * wordSize) + STACK_BIAS); + +void InterpreterMacroAssembler::jump_to_entry(address entry) { + assert(entry, "Entry must have been generated by now"); + AddressLiteral al(entry); + jump_to(al, G3_scratch); + delayed()->nop(); +} + +void InterpreterMacroAssembler::compute_extra_locals_size_in_bytes(Register args_size, Register locals_size, Register delta) { + // Note: this algorithm is also used by C1's OSR entry sequence. + // Any changes should also be applied to CodeEmitter::emit_osr_entry(). + assert_different_registers(args_size, locals_size); + // max_locals*2 for TAGS. Assumes that args_size has already been adjusted. + subcc(locals_size, args_size, delta);// extra space for non-arguments locals in words + // Use br/mov combination because it works on both V8 and V9 and is + // faster. + Label skip_move; + br(Assembler::negative, true, Assembler::pt, skip_move); + delayed()->mov(G0, delta); + bind(skip_move); + align_up(delta, WordsPerLong); // make multiple of 2 (SP must be 2-word aligned) + sll(delta, LogBytesPerWord, delta); // extra space for locals in bytes +} + +// Dispatch code executed in the prolog of a bytecode which does not do it's +// own dispatch. The dispatch address is computed and placed in IdispatchAddress +void InterpreterMacroAssembler::dispatch_prolog(TosState state, int bcp_incr) { + assert_not_delayed(); + ldub( Lbcp, bcp_incr, Lbyte_code); // load next bytecode + // dispatch table to use + AddressLiteral tbl(Interpreter::dispatch_table(state)); + sll(Lbyte_code, LogBytesPerWord, Lbyte_code); // multiply by wordSize + set(tbl, G3_scratch); // compute addr of table + ld_ptr(G3_scratch, Lbyte_code, IdispatchAddress); // get entry addr +} + + +// Dispatch code executed in the epilog of a bytecode which does not do it's +// own dispatch. The dispatch address in IdispatchAddress is used for the +// dispatch. +void InterpreterMacroAssembler::dispatch_epilog(TosState state, int bcp_incr) { + assert_not_delayed(); + interp_verify_oop(Otos_i, state, __FILE__, __LINE__); + jmp( IdispatchAddress, 0 ); + if (bcp_incr != 0) delayed()->inc(Lbcp, bcp_incr); + else delayed()->nop(); +} + +void InterpreterMacroAssembler::dispatch_next(TosState state, int bcp_incr, bool generate_poll) { + // %%%% consider branching to a single shared dispatch stub (for each bcp_incr) + assert_not_delayed(); + ldub( Lbcp, bcp_incr, Lbyte_code); // load next bytecode + dispatch_Lbyte_code(state, Interpreter::dispatch_table(state), bcp_incr, true, generate_poll); +} + + +void InterpreterMacroAssembler::dispatch_next_noverify_oop(TosState state, int bcp_incr) { + // %%%% consider branching to a single shared dispatch stub (for each bcp_incr) + assert_not_delayed(); + ldub( Lbcp, bcp_incr, Lbyte_code); // load next bytecode + dispatch_Lbyte_code(state, Interpreter::dispatch_table(state), bcp_incr, false); +} + + +void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { + // load current bytecode + assert_not_delayed(); + ldub( Lbcp, 0, Lbyte_code); // load next bytecode + dispatch_base(state, table); +} + + +void InterpreterMacroAssembler::call_VM_leaf_base( + Register java_thread, + address entry_point, + int number_of_arguments +) { + if (!java_thread->is_valid()) + java_thread = L7_thread_cache; + // super call + MacroAssembler::call_VM_leaf_base(java_thread, entry_point, number_of_arguments); +} + + +void InterpreterMacroAssembler::call_VM_base( + Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exception +) { + if (!java_thread->is_valid()) + java_thread = L7_thread_cache; + // See class ThreadInVMfromInterpreter, which assumes that the interpreter + // takes responsibility for setting its own thread-state on call-out. + // However, ThreadInVMfromInterpreter resets the state to "in_Java". + + //save_bcp(); // save bcp + MacroAssembler::call_VM_base(oop_result, java_thread, last_java_sp, entry_point, number_of_arguments, check_exception); + //restore_bcp(); // restore bcp + //restore_locals(); // restore locals pointer +} + + +void InterpreterMacroAssembler::check_and_handle_popframe(Register scratch_reg) { + if (JvmtiExport::can_pop_frame()) { + Label L; + + // Check the "pending popframe condition" flag in the current thread + ld(G2_thread, JavaThread::popframe_condition_offset(), scratch_reg); + + // Initiate popframe handling only if it is not already being processed. If the flag + // has the popframe_processing bit set, it means that this code is called *during* popframe + // handling - we don't want to reenter. + btst(JavaThread::popframe_pending_bit, scratch_reg); + br(zero, false, pt, L); + delayed()->nop(); + btst(JavaThread::popframe_processing_bit, scratch_reg); + br(notZero, false, pt, L); + delayed()->nop(); + + // Call Interpreter::remove_activation_preserving_args_entry() to get the + // address of the same-named entrypoint in the generated interpreter code. + call_VM_leaf(noreg, CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); + + // Jump to Interpreter::_remove_activation_preserving_args_entry + jmpl(O0, G0, G0); + delayed()->nop(); + bind(L); + } +} + + +void InterpreterMacroAssembler::load_earlyret_value(TosState state) { + Register thr_state = G4_scratch; + ld_ptr(G2_thread, JavaThread::jvmti_thread_state_offset(), thr_state); + const Address tos_addr(thr_state, JvmtiThreadState::earlyret_tos_offset()); + const Address oop_addr(thr_state, JvmtiThreadState::earlyret_oop_offset()); + const Address val_addr(thr_state, JvmtiThreadState::earlyret_value_offset()); + switch (state) { + case ltos: ld_long(val_addr, Otos_l); break; + case atos: ld_ptr(oop_addr, Otos_l); + st_ptr(G0, oop_addr); break; + case btos: // fall through + case ztos: // fall through + case ctos: // fall through + case stos: // fall through + case itos: ld(val_addr, Otos_l1); break; + case ftos: ldf(FloatRegisterImpl::S, val_addr, Ftos_f); break; + case dtos: ldf(FloatRegisterImpl::D, val_addr, Ftos_d); break; + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } + // Clean up tos value in the jvmti thread state + or3(G0, ilgl, G3_scratch); + stw(G3_scratch, tos_addr); + st_long(G0, val_addr); + interp_verify_oop(Otos_i, state, __FILE__, __LINE__); +} + + +void InterpreterMacroAssembler::check_and_handle_earlyret(Register scratch_reg) { + if (JvmtiExport::can_force_early_return()) { + Label L; + Register thr_state = G3_scratch; + ld_ptr(G2_thread, JavaThread::jvmti_thread_state_offset(), thr_state); + br_null_short(thr_state, pt, L); // if (thread->jvmti_thread_state() == NULL) exit; + + // Initiate earlyret handling only if it is not already being processed. + // If the flag has the earlyret_processing bit set, it means that this code + // is called *during* earlyret handling - we don't want to reenter. + ld(thr_state, JvmtiThreadState::earlyret_state_offset(), G4_scratch); + cmp_and_br_short(G4_scratch, JvmtiThreadState::earlyret_pending, Assembler::notEqual, pt, L); + + // Call Interpreter::remove_activation_early_entry() to get the address of the + // same-named entrypoint in the generated interpreter code + ld(thr_state, JvmtiThreadState::earlyret_tos_offset(), Otos_l1); + call_VM_leaf(noreg, CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), Otos_l1); + + // Jump to Interpreter::_remove_activation_early_entry + jmpl(O0, G0, G0); + delayed()->nop(); + bind(L); + } +} + + +void InterpreterMacroAssembler::super_call_VM_leaf(Register thread_cache, address entry_point, Register arg_1, Register arg_2) { + mov(arg_1, O0); + mov(arg_2, O1); + MacroAssembler::call_VM_leaf_base(thread_cache, entry_point, 2); +} + +void InterpreterMacroAssembler::dispatch_base(TosState state, address* table) { + assert_not_delayed(); + dispatch_Lbyte_code(state, table); +} + + +void InterpreterMacroAssembler::dispatch_normal(TosState state) { + dispatch_base(state, Interpreter::normal_table(state)); +} + + +void InterpreterMacroAssembler::dispatch_only(TosState state) { + dispatch_base(state, Interpreter::dispatch_table(state)); +} + + +// common code to dispatch and dispatch_only +// dispatch value in Lbyte_code and increment Lbcp + +void InterpreterMacroAssembler::dispatch_Lbyte_code(TosState state, address* table, int bcp_incr, bool verify, bool generate_poll) { + // %%%%% maybe implement +VerifyActivationFrameSize here + //verify_thread(); //too slow; we will just verify on method entry & exit + if (verify) interp_verify_oop(Otos_i, state, __FILE__, __LINE__); + // dispatch table to use + AddressLiteral tbl(table); + Label dispatch; + + if (generate_poll) { + AddressLiteral sfpt_tbl(Interpreter::safept_table(state)); + Label no_safepoint; + + if (tbl.value() != sfpt_tbl.value()) { + ldx(Address(G2_thread, JavaThread::polling_page_offset()), G3_scratch, 0); + // Armed page has poll_bit set, if poll bit is cleared just continue. + and3(G3_scratch, SafepointMechanism::poll_bit(), G3_scratch); + + br_null_short(G3_scratch, Assembler::pt, no_safepoint); + set(sfpt_tbl, G3_scratch); + ba_short(dispatch); + } + bind(no_safepoint); + } + + set(tbl, G3_scratch); // compute addr of table + bind(dispatch); + sll(Lbyte_code, LogBytesPerWord, Lbyte_code); // multiply by wordSize + ld_ptr(G3_scratch, Lbyte_code, G3_scratch); // get entry addr + jmp( G3_scratch, 0 ); + if (bcp_incr != 0) delayed()->inc(Lbcp, bcp_incr); + else delayed()->nop(); +} + + +// Helpers for expression stack + +// Longs and doubles are Category 2 computational types in the +// JVM specification (section 3.11.1) and take 2 expression stack or +// local slots. +// Aligning them on 32 bit with tagged stacks is hard because the code generated +// for the dup* bytecodes depends on what types are already on the stack. +// If the types are split into the two stack/local slots, that is much easier +// (and we can use 0 for non-reference tags). + +// Known good alignment in _LP64 but unknown otherwise +void InterpreterMacroAssembler::load_unaligned_double(Register r1, int offset, FloatRegister d) { + assert_not_delayed(); + + ldf(FloatRegisterImpl::D, r1, offset, d); +} + +// Known good alignment in _LP64 but unknown otherwise +void InterpreterMacroAssembler::store_unaligned_double(FloatRegister d, Register r1, int offset) { + assert_not_delayed(); + + stf(FloatRegisterImpl::D, d, r1, offset); + // store something more useful here + debug_only(stx(G0, r1, offset+Interpreter::stackElementSize);) +} + + +// Known good alignment in _LP64 but unknown otherwise +void InterpreterMacroAssembler::load_unaligned_long(Register r1, int offset, Register rd) { + assert_not_delayed(); + ldx(r1, offset, rd); +} + +// Known good alignment in _LP64 but unknown otherwise +void InterpreterMacroAssembler::store_unaligned_long(Register l, Register r1, int offset) { + assert_not_delayed(); + + stx(l, r1, offset); + // store something more useful here + stx(G0, r1, offset+Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::pop_i(Register r) { + assert_not_delayed(); + ld(Lesp, Interpreter::expr_offset_in_bytes(0), r); + inc(Lesp, Interpreter::stackElementSize); + debug_only(verify_esp(Lesp)); +} + +void InterpreterMacroAssembler::pop_ptr(Register r, Register scratch) { + assert_not_delayed(); + ld_ptr(Lesp, Interpreter::expr_offset_in_bytes(0), r); + inc(Lesp, Interpreter::stackElementSize); + debug_only(verify_esp(Lesp)); +} + +void InterpreterMacroAssembler::pop_l(Register r) { + assert_not_delayed(); + load_unaligned_long(Lesp, Interpreter::expr_offset_in_bytes(0), r); + inc(Lesp, 2*Interpreter::stackElementSize); + debug_only(verify_esp(Lesp)); +} + + +void InterpreterMacroAssembler::pop_f(FloatRegister f, Register scratch) { + assert_not_delayed(); + ldf(FloatRegisterImpl::S, Lesp, Interpreter::expr_offset_in_bytes(0), f); + inc(Lesp, Interpreter::stackElementSize); + debug_only(verify_esp(Lesp)); +} + + +void InterpreterMacroAssembler::pop_d(FloatRegister f, Register scratch) { + assert_not_delayed(); + load_unaligned_double(Lesp, Interpreter::expr_offset_in_bytes(0), f); + inc(Lesp, 2*Interpreter::stackElementSize); + debug_only(verify_esp(Lesp)); +} + + +void InterpreterMacroAssembler::push_i(Register r) { + assert_not_delayed(); + debug_only(verify_esp(Lesp)); + st(r, Lesp, 0); + dec(Lesp, Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::push_ptr(Register r) { + assert_not_delayed(); + st_ptr(r, Lesp, 0); + dec(Lesp, Interpreter::stackElementSize); +} + +// remember: our convention for longs in SPARC is: +// O0 (Otos_l1) has high-order part in first word, +// O1 (Otos_l2) has low-order part in second word + +void InterpreterMacroAssembler::push_l(Register r) { + assert_not_delayed(); + debug_only(verify_esp(Lesp)); + // Longs are stored in memory-correct order, even if unaligned. + int offset = -Interpreter::stackElementSize; + store_unaligned_long(r, Lesp, offset); + dec(Lesp, 2 * Interpreter::stackElementSize); +} + + +void InterpreterMacroAssembler::push_f(FloatRegister f) { + assert_not_delayed(); + debug_only(verify_esp(Lesp)); + stf(FloatRegisterImpl::S, f, Lesp, 0); + dec(Lesp, Interpreter::stackElementSize); +} + + +void InterpreterMacroAssembler::push_d(FloatRegister d) { + assert_not_delayed(); + debug_only(verify_esp(Lesp)); + // Longs are stored in memory-correct order, even if unaligned. + int offset = -Interpreter::stackElementSize; + store_unaligned_double(d, Lesp, offset); + dec(Lesp, 2 * Interpreter::stackElementSize); +} + + +void InterpreterMacroAssembler::push(TosState state) { + interp_verify_oop(Otos_i, state, __FILE__, __LINE__); + switch (state) { + case atos: push_ptr(); break; + case btos: // fall through + case ztos: // fall through + case ctos: // fall through + case stos: // fall through + case itos: push_i(); break; + case ltos: push_l(); break; + case ftos: push_f(); break; + case dtos: push_d(); break; + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } +} + + +void InterpreterMacroAssembler::pop(TosState state) { + switch (state) { + case atos: pop_ptr(); break; + case btos: // fall through + case ztos: // fall through + case ctos: // fall through + case stos: // fall through + case itos: pop_i(); break; + case ltos: pop_l(); break; + case ftos: pop_f(); break; + case dtos: pop_d(); break; + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } + interp_verify_oop(Otos_i, state, __FILE__, __LINE__); +} + + +// Helpers for swap and dup +void InterpreterMacroAssembler::load_ptr(int n, Register val) { + ld_ptr(Lesp, Interpreter::expr_offset_in_bytes(n), val); +} +void InterpreterMacroAssembler::store_ptr(int n, Register val) { + st_ptr(val, Lesp, Interpreter::expr_offset_in_bytes(n)); +} + + +void InterpreterMacroAssembler::load_receiver(Register param_count, + Register recv) { + sll(param_count, Interpreter::logStackElementSize, param_count); + ld_ptr(Lesp, param_count, recv); // gets receiver oop +} + +void InterpreterMacroAssembler::empty_expression_stack() { + // Reset Lesp. + sub( Lmonitors, wordSize, Lesp ); + + // Reset SP by subtracting more space from Lesp. + Label done; + assert(G4_scratch != Gframe_size, "Only you can prevent register aliasing!"); + + // A native does not need to do this, since its callee does not change SP. + ld(Lmethod, Method::access_flags_offset(), Gframe_size); // Load access flags. + btst(JVM_ACC_NATIVE, Gframe_size); + br(Assembler::notZero, false, Assembler::pt, done); + delayed()->nop(); + + // Compute max expression stack+register save area + ld_ptr(Lmethod, in_bytes(Method::const_offset()), Gframe_size); + lduh(Gframe_size, in_bytes(ConstMethod::max_stack_offset()), Gframe_size); // Load max stack. + add(Gframe_size, frame::memory_parameter_word_sp_offset+Method::extra_stack_entries(), Gframe_size ); + + // + // now set up a stack frame with the size computed above + // + //round_to( Gframe_size, WordsPerLong ); // -- moved down to the "and" below + sll( Gframe_size, LogBytesPerWord, Gframe_size ); + sub( Lesp, Gframe_size, Gframe_size ); + and3( Gframe_size, -(2 * wordSize), Gframe_size ); // align SP (downwards) to an 8/16-byte boundary + debug_only(verify_sp(Gframe_size, G4_scratch)); + sub(Gframe_size, STACK_BIAS, Gframe_size ); + mov(Gframe_size, SP); + + bind(done); +} + + +#ifdef ASSERT +void InterpreterMacroAssembler::verify_sp(Register Rsp, Register Rtemp) { + Label Bad, OK; + + // Saved SP must be aligned. + btst(2*BytesPerWord-1, Rsp); + br(Assembler::notZero, false, Assembler::pn, Bad); + delayed()->nop(); + + // Saved SP, plus register window size, must not be above FP. + add(Rsp, frame::register_save_words * wordSize, Rtemp); + sub(Rtemp, STACK_BIAS, Rtemp); // Bias Rtemp before cmp to FP + cmp_and_brx_short(Rtemp, FP, Assembler::greaterUnsigned, Assembler::pn, Bad); + + // Saved SP must not be ridiculously below current SP. + size_t maxstack = MAX2(JavaThread::stack_size_at_create(), (size_t) 4*K*K); + set(maxstack, Rtemp); + sub(SP, Rtemp, Rtemp); + add(Rtemp, STACK_BIAS, Rtemp); // Unbias Rtemp before cmp to Rsp + cmp_and_brx_short(Rsp, Rtemp, Assembler::lessUnsigned, Assembler::pn, Bad); + + ba_short(OK); + + bind(Bad); + stop("on return to interpreted call, restored SP is corrupted"); + + bind(OK); +} + + +void InterpreterMacroAssembler::verify_esp(Register Resp) { + // about to read or write Resp[0] + // make sure it is not in the monitors or the register save area + Label OK1, OK2; + + cmp(Resp, Lmonitors); + brx(Assembler::lessUnsigned, true, Assembler::pt, OK1); + delayed()->sub(Resp, frame::memory_parameter_word_sp_offset * wordSize, Resp); + stop("too many pops: Lesp points into monitor area"); + bind(OK1); + sub(Resp, STACK_BIAS, Resp); + cmp(Resp, SP); + brx(Assembler::greaterEqualUnsigned, false, Assembler::pt, OK2); + delayed()->add(Resp, STACK_BIAS + frame::memory_parameter_word_sp_offset * wordSize, Resp); + stop("too many pushes: Lesp points into register window"); + bind(OK2); +} +#endif // ASSERT + +// Load compiled (i2c) or interpreter entry when calling from interpreted and +// do the call. Centralized so that all interpreter calls will do the same actions. +// If jvmti single stepping is on for a thread we must not call compiled code. +void InterpreterMacroAssembler::call_from_interpreter(Register target, Register scratch, Register Rret) { + + // Assume we want to go compiled if available + + ld_ptr(G5_method, in_bytes(Method::from_interpreted_offset()), target); + + if (JvmtiExport::can_post_interpreter_events()) { + // JVMTI events, such as single-stepping, are implemented partly by avoiding running + // compiled code in threads for which the event is enabled. Check here for + // interp_only_mode if these events CAN be enabled. + verify_thread(); + Label skip_compiled_code; + + const Address interp_only(G2_thread, JavaThread::interp_only_mode_offset()); + ld(interp_only, scratch); + cmp_zero_and_br(Assembler::notZero, scratch, skip_compiled_code, true, Assembler::pn); + delayed()->ld_ptr(G5_method, in_bytes(Method::interpreter_entry_offset()), target); + bind(skip_compiled_code); + } + + // the i2c_adapters need Method* in G5_method (right? %%%) + // do the call +#ifdef ASSERT + { + Label ok; + br_notnull_short(target, Assembler::pt, ok); + stop("null entry point"); + bind(ok); + } +#endif // ASSERT + + // Adjust Rret first so Llast_SP can be same as Rret + add(Rret, -frame::pc_return_offset, O7); + add(Lesp, BytesPerWord, Gargs); // setup parameter pointer + // Record SP so we can remove any stack space allocated by adapter transition + jmp(target, 0); + delayed()->mov(SP, Llast_SP); +} + +void InterpreterMacroAssembler::if_cmp(Condition cc, bool ptr_compare) { + assert_not_delayed(); + + Label not_taken; + if (ptr_compare) brx(cc, false, Assembler::pn, not_taken); + else br (cc, false, Assembler::pn, not_taken); + delayed()->nop(); + + TemplateTable::branch(false,false); + + bind(not_taken); + + profile_not_taken_branch(G3_scratch); +} + + +void InterpreterMacroAssembler::get_2_byte_integer_at_bcp( + int bcp_offset, + Register Rtmp, + Register Rdst, + signedOrNot is_signed, + setCCOrNot should_set_CC ) { + assert(Rtmp != Rdst, "need separate temp register"); + assert_not_delayed(); + switch (is_signed) { + default: ShouldNotReachHere(); + + case Signed: ldsb( Lbcp, bcp_offset, Rdst ); break; // high byte + case Unsigned: ldub( Lbcp, bcp_offset, Rdst ); break; // high byte + } + ldub( Lbcp, bcp_offset + 1, Rtmp ); // low byte + sll( Rdst, BitsPerByte, Rdst); + switch (should_set_CC ) { + default: ShouldNotReachHere(); + + case set_CC: orcc( Rdst, Rtmp, Rdst ); break; + case dont_set_CC: or3( Rdst, Rtmp, Rdst ); break; + } +} + + +void InterpreterMacroAssembler::get_4_byte_integer_at_bcp( + int bcp_offset, + Register Rtmp, + Register Rdst, + setCCOrNot should_set_CC ) { + assert(Rtmp != Rdst, "need separate temp register"); + assert_not_delayed(); + add( Lbcp, bcp_offset, Rtmp); + andcc( Rtmp, 3, G0); + Label aligned; + switch (should_set_CC ) { + default: ShouldNotReachHere(); + + case set_CC: break; + case dont_set_CC: break; + } + + br(Assembler::zero, true, Assembler::pn, aligned); + delayed()->ldsw(Rtmp, 0, Rdst); + + ldub(Lbcp, bcp_offset + 3, Rdst); + ldub(Lbcp, bcp_offset + 2, Rtmp); sll(Rtmp, 8, Rtmp); or3(Rtmp, Rdst, Rdst); + ldub(Lbcp, bcp_offset + 1, Rtmp); sll(Rtmp, 16, Rtmp); or3(Rtmp, Rdst, Rdst); + ldsb(Lbcp, bcp_offset + 0, Rtmp); sll(Rtmp, 24, Rtmp); + or3(Rtmp, Rdst, Rdst ); + + bind(aligned); + if (should_set_CC == set_CC) tst(Rdst); +} + +void InterpreterMacroAssembler::get_cache_index_at_bcp(Register temp, Register index, + int bcp_offset, size_t index_size) { + assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + if (index_size == sizeof(u2)) { + get_2_byte_integer_at_bcp(bcp_offset, temp, index, Unsigned); + } else if (index_size == sizeof(u4)) { + get_4_byte_integer_at_bcp(bcp_offset, temp, index); + assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); + xor3(index, -1, index); // convert to plain index + } else if (index_size == sizeof(u1)) { + ldub(Lbcp, bcp_offset, index); + } else { + ShouldNotReachHere(); + } +} + + +void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, Register tmp, + int bcp_offset, size_t index_size) { + assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + assert_different_registers(cache, tmp); + assert_not_delayed(); + get_cache_index_at_bcp(cache, tmp, bcp_offset, index_size); + // convert from field index to ConstantPoolCacheEntry index and from + // word index to byte offset + sll(tmp, exact_log2(in_words(ConstantPoolCacheEntry::size()) * BytesPerWord), tmp); + add(LcpoolCache, tmp, cache); +} + + +void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, + Register temp, + Register bytecode, + int byte_no, + int bcp_offset, + size_t index_size) { + get_cache_and_index_at_bcp(cache, temp, bcp_offset, index_size); + ld_ptr(cache, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset(), bytecode); + const int shift_count = (1 + byte_no) * BitsPerByte; + assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) || + (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift), + "correct shift count"); + srl(bytecode, shift_count, bytecode); + assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask"); + and3(bytecode, ConstantPoolCacheEntry::bytecode_1_mask, bytecode); +} + + +void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, Register tmp, + int bcp_offset, size_t index_size) { + assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + assert_different_registers(cache, tmp); + assert_not_delayed(); + if (index_size == sizeof(u2)) { + get_2_byte_integer_at_bcp(bcp_offset, cache, tmp, Unsigned); + } else { + ShouldNotReachHere(); // other sizes not supported here + } + // convert from field index to ConstantPoolCacheEntry index + // and from word index to byte offset + sll(tmp, exact_log2(in_words(ConstantPoolCacheEntry::size()) * BytesPerWord), tmp); + // skip past the header + add(tmp, in_bytes(ConstantPoolCache::base_offset()), tmp); + // construct pointer to cache entry + add(LcpoolCache, tmp, cache); +} + + +// Load object from cpool->resolved_references(index) +void InterpreterMacroAssembler::load_resolved_reference_at_index( + Register result, Register index, Register tmp) { + assert_different_registers(result, index, tmp); + assert_not_delayed(); + // convert from field index to resolved_references() index and from + // word index to byte offset. Since this is a java object, it can be compressed + sll(index, LogBytesPerHeapOop, index); + get_constant_pool(result); + // load pointer for resolved_references[] objArray + ld_ptr(result, ConstantPool::cache_offset_in_bytes(), result); + ld_ptr(result, ConstantPoolCache::resolved_references_offset_in_bytes(), result); + resolve_oop_handle(result, tmp); + // Add in the index + add(result, index, result); + load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result, tmp); + // The resulting oop is null if the reference is not yet resolved. + // It is Universe::the_null_sentinel() if the reference resolved to NULL via condy. +} + + +// load cpool->resolved_klass_at(index) +void InterpreterMacroAssembler::load_resolved_klass_at_offset(Register Rcpool, + Register Roffset, Register Rklass) { + // int value = *this_cp->int_at_addr(which); + // int resolved_klass_index = extract_low_short_from_int(value); + // + // Because SPARC is big-endian, the low_short is at (cpool->int_at_addr(which) + 2 bytes) + add(Roffset, Rcpool, Roffset); + lduh(Roffset, sizeof(ConstantPool) + 2, Roffset); // Roffset = resolved_klass_index + + Register Rresolved_klasses = Rklass; + ld_ptr(Rcpool, ConstantPool::resolved_klasses_offset_in_bytes(), Rresolved_klasses); + sll(Roffset, LogBytesPerWord, Roffset); + add(Roffset, Array::base_offset_in_bytes(), Roffset); + ld_ptr(Rresolved_klasses, Roffset, Rklass); +} + + +// Generate a subtype check: branch to ok_is_subtype if sub_klass is +// a subtype of super_klass. Blows registers Rsuper_klass, Rsub_klass, tmp1, tmp2. +void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, + Register Rsuper_klass, + Register Rtmp1, + Register Rtmp2, + Register Rtmp3, + Label &ok_is_subtype ) { + Label not_subtype; + + // Profile the not-null value's klass. + profile_typecheck(Rsub_klass, Rtmp1); + + check_klass_subtype_fast_path(Rsub_klass, Rsuper_klass, + Rtmp1, Rtmp2, + &ok_is_subtype, ¬_subtype, NULL); + + check_klass_subtype_slow_path(Rsub_klass, Rsuper_klass, + Rtmp1, Rtmp2, Rtmp3, /*hack:*/ noreg, + &ok_is_subtype, NULL); + + bind(not_subtype); + profile_typecheck_failed(Rtmp1); +} + +// Separate these two to allow for delay slot in middle +// These are used to do a test and full jump to exception-throwing code. + +// %%%%% Could possibly reoptimize this by testing to see if could use +// a single conditional branch (i.e. if span is small enough. +// If you go that route, than get rid of the split and give up +// on the delay-slot hack. + +void InterpreterMacroAssembler::throw_if_not_1_icc( Condition ok_condition, + Label& ok ) { + assert_not_delayed(); + br(ok_condition, true, pt, ok); + // DELAY SLOT +} + +void InterpreterMacroAssembler::throw_if_not_1_xcc( Condition ok_condition, + Label& ok ) { + assert_not_delayed(); + bp( ok_condition, true, Assembler::xcc, pt, ok); + // DELAY SLOT +} + +void InterpreterMacroAssembler::throw_if_not_1_x( Condition ok_condition, + Label& ok ) { + assert_not_delayed(); + brx(ok_condition, true, pt, ok); + // DELAY SLOT +} + +void InterpreterMacroAssembler::throw_if_not_2( address throw_entry_point, + Register Rscratch, + Label& ok ) { + assert(throw_entry_point != NULL, "entry point must be generated by now"); + AddressLiteral dest(throw_entry_point); + jump_to(dest, Rscratch); + delayed()->nop(); + bind(ok); +} + + +// And if you cannot use the delay slot, here is a shorthand: + +void InterpreterMacroAssembler::throw_if_not_icc( Condition ok_condition, + address throw_entry_point, + Register Rscratch ) { + Label ok; + if (ok_condition != never) { + throw_if_not_1_icc( ok_condition, ok); + delayed()->nop(); + } + throw_if_not_2( throw_entry_point, Rscratch, ok); +} +void InterpreterMacroAssembler::throw_if_not_xcc( Condition ok_condition, + address throw_entry_point, + Register Rscratch ) { + Label ok; + if (ok_condition != never) { + throw_if_not_1_xcc( ok_condition, ok); + delayed()->nop(); + } + throw_if_not_2( throw_entry_point, Rscratch, ok); +} +void InterpreterMacroAssembler::throw_if_not_x( Condition ok_condition, + address throw_entry_point, + Register Rscratch ) { + Label ok; + if (ok_condition != never) { + throw_if_not_1_x( ok_condition, ok); + delayed()->nop(); + } + throw_if_not_2( throw_entry_point, Rscratch, ok); +} + +// Check that index is in range for array, then shift index by index_shift, and put arrayOop + shifted_index into res +// Note: res is still shy of address by array offset into object. + +void InterpreterMacroAssembler::index_check_without_pop(Register array, Register index, int index_shift, Register tmp, Register res) { + assert_not_delayed(); + + verify_oop(array); + // Sign extend since tos (index) can be a 32bit value. + sra(index, G0, index); + + // Check array. + Label ptr_ok; + tst(array); + throw_if_not_1_x(notZero, ptr_ok); + delayed()->ld(array, arrayOopDesc::length_offset_in_bytes(), tmp); // Check index. + throw_if_not_2(Interpreter::_throw_NullPointerException_entry, G3_scratch, ptr_ok); + + Label index_ok; + cmp(index, tmp); + throw_if_not_1_icc(lessUnsigned, index_ok); + if (index_shift > 0) { + delayed()->sll(index, index_shift, index); + } else { + delayed()->add(array, index, res); // addr - const offset in index + } + // Pass the array to create more detailed exceptions. + // Convention: move aberrant index into Otos_i for exception message. + mov(index, Otos_i); + mov(array, G3_scratch); + throw_if_not_2(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry, G4_scratch, index_ok); + + // add offset if didn't do it in delay slot + if (index_shift > 0) { add(array, index, res); } // addr - const offset in index +} + + +void InterpreterMacroAssembler::index_check(Register array, Register index, int index_shift, Register tmp, Register res) { + assert_not_delayed(); + + // pop array + pop_ptr(array); + + // check array + index_check_without_pop(array, index, index_shift, tmp, res); +} + + +void InterpreterMacroAssembler::get_const(Register Rdst) { + ld_ptr(Lmethod, in_bytes(Method::const_offset()), Rdst); +} + + +void InterpreterMacroAssembler::get_constant_pool(Register Rdst) { + get_const(Rdst); + ld_ptr(Rdst, in_bytes(ConstMethod::constants_offset()), Rdst); +} + + +void InterpreterMacroAssembler::get_constant_pool_cache(Register Rdst) { + get_constant_pool(Rdst); + ld_ptr(Rdst, ConstantPool::cache_offset_in_bytes(), Rdst); +} + + +void InterpreterMacroAssembler::get_cpool_and_tags(Register Rcpool, Register Rtags) { + get_constant_pool(Rcpool); + ld_ptr(Rcpool, ConstantPool::tags_offset_in_bytes(), Rtags); +} + + +// unlock if synchronized method +// +// Unlock the receiver if this is a synchronized method. +// Unlock any Java monitors from synchronized blocks. +// +// If there are locked Java monitors +// If throw_monitor_exception +// throws IllegalMonitorStateException +// Else if install_monitor_exception +// installs IllegalMonitorStateException +// Else +// no error processing +void InterpreterMacroAssembler::unlock_if_synchronized_method(TosState state, + bool throw_monitor_exception, + bool install_monitor_exception) { + Label unlocked, unlock, no_unlock; + + // get the value of _do_not_unlock_if_synchronized into G1_scratch + const Address do_not_unlock_if_synchronized(G2_thread, + JavaThread::do_not_unlock_if_synchronized_offset()); + ldbool(do_not_unlock_if_synchronized, G1_scratch); + stbool(G0, do_not_unlock_if_synchronized); // reset the flag + + // check if synchronized method + const Address access_flags(Lmethod, Method::access_flags_offset()); + interp_verify_oop(Otos_i, state, __FILE__, __LINE__); + push(state); // save tos + ld(access_flags, G3_scratch); // Load access flags. + btst(JVM_ACC_SYNCHRONIZED, G3_scratch); + br(zero, false, pt, unlocked); + delayed()->nop(); + + // Don't unlock anything if the _do_not_unlock_if_synchronized flag + // is set. + cmp_zero_and_br(Assembler::notZero, G1_scratch, no_unlock); + delayed()->nop(); + + // BasicObjectLock will be first in list, since this is a synchronized method. However, need + // to check that the object has not been unlocked by an explicit monitorexit bytecode. + + //Intel: if (throw_monitor_exception) ... else ... + // Entry already unlocked, need to throw exception + //... + + // pass top-most monitor elem + add( top_most_monitor(), O1 ); + + ld_ptr(O1, BasicObjectLock::obj_offset_in_bytes(), G3_scratch); + br_notnull_short(G3_scratch, pt, unlock); + + if (throw_monitor_exception) { + // Entry already unlocked need to throw an exception + MacroAssembler::call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception)); + should_not_reach_here(); + } else { + // Monitor already unlocked during a stack unroll. + // If requested, install an illegal_monitor_state_exception. + // Continue with stack unrolling. + if (install_monitor_exception) { + MacroAssembler::call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::new_illegal_monitor_state_exception)); + } + ba_short(unlocked); + } + + bind(unlock); + + unlock_object(O1); + + bind(unlocked); + + // I0, I1: Might contain return value + + // Check that all monitors are unlocked + { Label loop, exception, entry, restart; + + Register Rmptr = O0; + Register Rtemp = O1; + Register Rlimit = Lmonitors; + const jint delta = frame::interpreter_frame_monitor_size() * wordSize; + assert( (delta & LongAlignmentMask) == 0, + "sizeof BasicObjectLock must be even number of doublewords"); + + #ifdef ASSERT + add(top_most_monitor(), Rmptr, delta); + { Label L; + // ensure that Rmptr starts out above (or at) Rlimit + cmp_and_brx_short(Rmptr, Rlimit, Assembler::greaterEqualUnsigned, pn, L); + stop("monitor stack has negative size"); + bind(L); + } + #endif + bind(restart); + ba(entry); + delayed()-> + add(top_most_monitor(), Rmptr, delta); // points to current entry, starting with bottom-most entry + + // Entry is still locked, need to throw exception + bind(exception); + if (throw_monitor_exception) { + MacroAssembler::call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception)); + should_not_reach_here(); + } else { + // Stack unrolling. Unlock object and if requested, install illegal_monitor_exception. + // Unlock does not block, so don't have to worry about the frame + unlock_object(Rmptr); + if (install_monitor_exception) { + MacroAssembler::call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::new_illegal_monitor_state_exception)); + } + ba_short(restart); + } + + bind(loop); + cmp(Rtemp, G0); // check if current entry is used + brx(Assembler::notEqual, false, pn, exception); + delayed()-> + dec(Rmptr, delta); // otherwise advance to next entry + #ifdef ASSERT + { Label L; + // ensure that Rmptr has not somehow stepped below Rlimit + cmp_and_brx_short(Rmptr, Rlimit, Assembler::greaterEqualUnsigned, pn, L); + stop("ran off the end of the monitor stack"); + bind(L); + } + #endif + bind(entry); + cmp(Rmptr, Rlimit); // check if bottom reached + brx(Assembler::notEqual, true, pn, loop); // if not at bottom then check this entry + delayed()-> + ld_ptr(Rmptr, BasicObjectLock::obj_offset_in_bytes() - delta, Rtemp); + } + + bind(no_unlock); + pop(state); + interp_verify_oop(Otos_i, state, __FILE__, __LINE__); +} + +void InterpreterMacroAssembler::narrow(Register result) { + + ld_ptr(Address(Lmethod, Method::const_offset()), G3_scratch); + ldub(G3_scratch, in_bytes(ConstMethod::result_type_offset()), G3_scratch); + + Label notBool, notByte, notChar, done; + + // common case first + cmp(G3_scratch, T_INT); + br(Assembler::equal, true, pn, done); + delayed()->nop(); + + cmp(G3_scratch, T_BOOLEAN); + br(Assembler::notEqual, true, pn, notBool); + delayed()->cmp(G3_scratch, T_BYTE); + and3(result, 1, result); + ba(done); + delayed()->nop(); + + bind(notBool); + // cmp(G3_scratch, T_BYTE); + br(Assembler::notEqual, true, pn, notByte); + delayed()->cmp(G3_scratch, T_CHAR); + sll(result, 24, result); + sra(result, 24, result); + ba(done); + delayed()->nop(); + + bind(notByte); + // cmp(G3_scratch, T_CHAR); + sll(result, 16, result); + br(Assembler::notEqual, true, pn, done); + delayed()->sra(result, 16, result); + // sll(result, 16, result); + srl(result, 16, result); + + // bind(notChar); + // must be short, instructions already executed in delay slot + // sll(result, 16, result); + // sra(result, 16, result); + + bind(done); +} + +// remove activation +// +// Unlock the receiver if this is a synchronized method. +// Unlock any Java monitors from synchronized blocks. +// Remove the activation from the stack. +// +// If there are locked Java monitors +// If throw_monitor_exception +// throws IllegalMonitorStateException +// Else if install_monitor_exception +// installs IllegalMonitorStateException +// Else +// no error processing +void InterpreterMacroAssembler::remove_activation(TosState state, + bool throw_monitor_exception, + bool install_monitor_exception) { + + unlock_if_synchronized_method(state, throw_monitor_exception, install_monitor_exception); + + // save result (push state before jvmti call and pop it afterwards) and notify jvmti + notify_method_exit(false, state, NotifyJVMTI); + + if (StackReservedPages > 0) { + // testing if Stack Reserved Area needs to be re-enabled + Label no_reserved_zone_enabling; + ld_ptr(G2_thread, JavaThread::reserved_stack_activation_offset(), G3_scratch); + cmp_and_brx_short(SP, G3_scratch, Assembler::lessUnsigned, Assembler::pt, no_reserved_zone_enabling); + + call_VM_leaf(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), G2_thread); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_delayed_StackOverflowError), G2_thread); + should_not_reach_here(); + + bind(no_reserved_zone_enabling); + } + + interp_verify_oop(Otos_i, state, __FILE__, __LINE__); + verify_thread(); + + // return tos + assert(Otos_l1 == Otos_i, "adjust code below"); + switch (state) { + case ltos: mov(Otos_l, Otos_l->after_save()); break; // O0 -> I0 + case btos: // fall through + case ztos: // fall through + case ctos: + case stos: // fall through + case atos: // fall through + case itos: mov(Otos_l1, Otos_l1->after_save()); break; // O0 -> I0 + case ftos: // fall through + case dtos: // fall through + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } +} + +// Lock object +// +// Argument - lock_reg points to the BasicObjectLock to be used for locking, +// it must be initialized with the object to lock +void InterpreterMacroAssembler::lock_object(Register lock_reg, Register Object) { + if (UseHeavyMonitors) { + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); + } + else { + Register obj_reg = Object; + Register mark_reg = G4_scratch; + Register temp_reg = G1_scratch; + Address lock_addr(lock_reg, BasicObjectLock::lock_offset_in_bytes()); + Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes()); + Label done; + + Label slow_case; + + assert_different_registers(lock_reg, obj_reg, mark_reg, temp_reg); + + // load markWord from object into mark_reg + ld_ptr(mark_addr, mark_reg); + + if (UseBiasedLocking) { + biased_locking_enter(obj_reg, mark_reg, temp_reg, done, &slow_case); + } + + // get the address of basicLock on stack that will be stored in the object + // we need a temporary register here as we do not want to clobber lock_reg + // (cas clobbers the destination register) + mov(lock_reg, temp_reg); + // set mark reg to be (markWord of object | UNLOCK_VALUE) + or3(mark_reg, markWord::unlocked_value, mark_reg); + // initialize the box (Must happen before we update the object mark!) + st_ptr(mark_reg, lock_addr, BasicLock::displaced_header_offset_in_bytes()); + // compare and exchange object_addr, markWord | 1, stack address of basicLock + assert(mark_addr.disp() == 0, "cas must take a zero displacement"); + cas_ptr(mark_addr.base(), mark_reg, temp_reg); + + // if the compare and exchange succeeded we are done (we saw an unlocked object) + cmp_and_brx_short(mark_reg, temp_reg, Assembler::equal, Assembler::pt, done); + + // We did not see an unlocked object so try the fast recursive case + + // Check if owner is self by comparing the value in the markWord of object + // with the stack pointer + sub(temp_reg, SP, temp_reg); + sub(temp_reg, STACK_BIAS, temp_reg); + assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); + + // Composite "andcc" test: + // (a) %sp -vs- markword proximity check, and, + // (b) verify mark word LSBs == 0 (Stack-locked). + // + // FFFFF003/FFFFFFFFFFFF003 is (markWord::lock_mask_in_place | -os::vm_page_size()) + // Note that the page size used for %sp proximity testing is arbitrary and is + // unrelated to the actual MMU page size. We use a 'logical' page size of + // 4096 bytes. F..FFF003 is designed to fit conveniently in the SIMM13 immediate + // field of the andcc instruction. + andcc (temp_reg, 0xFFFFF003, G0) ; + + // if condition is true we are done and hence we can store 0 in the displaced + // header indicating it is a recursive lock and be done + brx(Assembler::zero, true, Assembler::pt, done); + delayed()->st_ptr(G0, lock_addr, BasicLock::displaced_header_offset_in_bytes()); + + // none of the above fast optimizations worked so we have to get into the + // slow case of monitor enter + bind(slow_case); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); + + bind(done); + } +} + +// Unlocks an object. Used in monitorexit bytecode and remove_activation. +// +// Argument - lock_reg points to the BasicObjectLock for lock +// Throw IllegalMonitorException if object is not locked by current thread +void InterpreterMacroAssembler::unlock_object(Register lock_reg) { + if (UseHeavyMonitors) { + call_VM_leaf(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); + } else { + Register obj_reg = G3_scratch; + Register mark_reg = G4_scratch; + Register displaced_header_reg = G1_scratch; + Address lockobj_addr(lock_reg, BasicObjectLock::obj_offset_in_bytes()); + Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes()); + Label done; + + if (UseBiasedLocking) { + // load the object out of the BasicObjectLock + ld_ptr(lockobj_addr, obj_reg); + biased_locking_exit(mark_addr, mark_reg, done, true); + st_ptr(G0, lockobj_addr); // free entry + } + + // Test first if we are in the fast recursive case + Address lock_addr(lock_reg, BasicObjectLock::lock_offset_in_bytes() + BasicLock::displaced_header_offset_in_bytes()); + ld_ptr(lock_addr, displaced_header_reg); + br_null(displaced_header_reg, true, Assembler::pn, done); + delayed()->st_ptr(G0, lockobj_addr); // free entry + + // See if it is still a light weight lock, if so we just unlock + // the object and we are done + + if (!UseBiasedLocking) { + // load the object out of the BasicObjectLock + ld_ptr(lockobj_addr, obj_reg); + } + + // we have the displaced header in displaced_header_reg + // we expect to see the stack address of the basicLock in case the + // lock is still a light weight lock (lock_reg) + assert(mark_addr.disp() == 0, "cas must take a zero displacement"); + cas_ptr(mark_addr.base(), lock_reg, displaced_header_reg); + cmp(lock_reg, displaced_header_reg); + brx(Assembler::equal, true, Assembler::pn, done); + delayed()->st_ptr(G0, lockobj_addr); // free entry + + // The lock has been converted into a heavy lock and hence + // we need to get into the slow case + + call_VM_leaf(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); + + bind(done); + } +} + +// Get the method data pointer from the Method* and set the +// specified register to its value. + +void InterpreterMacroAssembler::set_method_data_pointer() { + assert(ProfileInterpreter, "must be profiling interpreter"); + Label get_continue; + + ld_ptr(Lmethod, in_bytes(Method::method_data_offset()), ImethodDataPtr); + test_method_data_pointer(get_continue); + add(ImethodDataPtr, in_bytes(MethodData::data_offset()), ImethodDataPtr); + bind(get_continue); +} + +// Set the method data pointer for the current bcp. + +void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { + assert(ProfileInterpreter, "must be profiling interpreter"); + Label zero_continue; + + // Test MDO to avoid the call if it is NULL. + ld_ptr(Lmethod, in_bytes(Method::method_data_offset()), ImethodDataPtr); + test_method_data_pointer(zero_continue); + call_VM_leaf(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), Lmethod, Lbcp); + add(ImethodDataPtr, in_bytes(MethodData::data_offset()), ImethodDataPtr); + add(ImethodDataPtr, O0, ImethodDataPtr); + bind(zero_continue); +} + +// Test ImethodDataPtr. If it is null, continue at the specified label + +void InterpreterMacroAssembler::test_method_data_pointer(Label& zero_continue) { + assert(ProfileInterpreter, "must be profiling interpreter"); + br_null_short(ImethodDataPtr, Assembler::pn, zero_continue); +} + +void InterpreterMacroAssembler::verify_method_data_pointer() { + assert(ProfileInterpreter, "must be profiling interpreter"); +#ifdef ASSERT + Label verify_continue; + test_method_data_pointer(verify_continue); + + // If the mdp is valid, it will point to a DataLayout header which is + // consistent with the bcp. The converse is highly probable also. + lduh(ImethodDataPtr, in_bytes(DataLayout::bci_offset()), G3_scratch); + ld_ptr(Lmethod, Method::const_offset(), O5); + add(G3_scratch, in_bytes(ConstMethod::codes_offset()), G3_scratch); + add(G3_scratch, O5, G3_scratch); + cmp(Lbcp, G3_scratch); + brx(Assembler::equal, false, Assembler::pt, verify_continue); + + Register temp_reg = O5; + delayed()->mov(ImethodDataPtr, temp_reg); + // %%% should use call_VM_leaf here? + //call_VM_leaf(noreg, ..., Lmethod, Lbcp, ImethodDataPtr); + save_frame_and_mov(sizeof(jdouble) / wordSize, Lmethod, O0, Lbcp, O1); + Address d_save(FP, -sizeof(jdouble) + STACK_BIAS); + stf(FloatRegisterImpl::D, Ftos_d, d_save); + mov(temp_reg->after_save(), O2); + save_thread(L7_thread_cache); + call(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), relocInfo::none); + delayed()->nop(); + restore_thread(L7_thread_cache); + ldf(FloatRegisterImpl::D, d_save, Ftos_d); + restore(); + bind(verify_continue); +#endif // ASSERT +} + +// Store a value at some constant offset from the method data pointer. + +void InterpreterMacroAssembler::set_mdp_data_at(int constant, Register value) { + assert(ProfileInterpreter, "must be profiling interpreter"); + st_ptr(value, ImethodDataPtr, constant); +} + +void InterpreterMacroAssembler::increment_mdp_data_at(Address counter, + Register bumped_count, + bool decrement) { + assert(ProfileInterpreter, "must be profiling interpreter"); + + // Load the counter. + ld_ptr(counter, bumped_count); + + if (decrement) { + // Decrement the register. Set condition codes. + subcc(bumped_count, DataLayout::counter_increment, bumped_count); + + // If the decrement causes the counter to overflow, stay negative + Label L; + brx(Assembler::negative, true, Assembler::pn, L); + + // Store the decremented counter, if it is still negative. + delayed()->st_ptr(bumped_count, counter); + bind(L); + } else { + // Increment the register. Set carry flag. + addcc(bumped_count, DataLayout::counter_increment, bumped_count); + + // If the increment causes the counter to overflow, pull back by 1. + assert(DataLayout::counter_increment == 1, "subc works"); + subc(bumped_count, G0, bumped_count); + + // Store the incremented counter. + st_ptr(bumped_count, counter); + } +} + +// Increment the value at some constant offset from the method data pointer. + +void InterpreterMacroAssembler::increment_mdp_data_at(int constant, + Register bumped_count, + bool decrement) { + // Locate the counter at a fixed offset from the mdp: + Address counter(ImethodDataPtr, constant); + increment_mdp_data_at(counter, bumped_count, decrement); +} + +// Increment the value at some non-fixed (reg + constant) offset from +// the method data pointer. + +void InterpreterMacroAssembler::increment_mdp_data_at(Register reg, + int constant, + Register bumped_count, + Register scratch2, + bool decrement) { + // Add the constant to reg to get the offset. + add(ImethodDataPtr, reg, scratch2); + Address counter(scratch2, constant); + increment_mdp_data_at(counter, bumped_count, decrement); +} + +// Set a flag value at the current method data pointer position. +// Updates a single byte of the header, to avoid races with other header bits. + +void InterpreterMacroAssembler::set_mdp_flag_at(int flag_constant, + Register scratch) { + assert(ProfileInterpreter, "must be profiling interpreter"); + // Load the data header + ldub(ImethodDataPtr, in_bytes(DataLayout::flags_offset()), scratch); + + // Set the flag + or3(scratch, flag_constant, scratch); + + // Store the modified header. + stb(scratch, ImethodDataPtr, in_bytes(DataLayout::flags_offset())); +} + +// Test the location at some offset from the method data pointer. +// If it is not equal to value, branch to the not_equal_continue Label. +// Set condition codes to match the nullness of the loaded value. + +void InterpreterMacroAssembler::test_mdp_data_at(int offset, + Register value, + Label& not_equal_continue, + Register scratch) { + assert(ProfileInterpreter, "must be profiling interpreter"); + ld_ptr(ImethodDataPtr, offset, scratch); + cmp(value, scratch); + brx(Assembler::notEqual, false, Assembler::pn, not_equal_continue); + delayed()->tst(scratch); +} + +// Update the method data pointer by the displacement located at some fixed +// offset from the method data pointer. + +void InterpreterMacroAssembler::update_mdp_by_offset(int offset_of_disp, + Register scratch) { + assert(ProfileInterpreter, "must be profiling interpreter"); + ld_ptr(ImethodDataPtr, offset_of_disp, scratch); + add(ImethodDataPtr, scratch, ImethodDataPtr); +} + +// Update the method data pointer by the displacement located at the +// offset (reg + offset_of_disp). + +void InterpreterMacroAssembler::update_mdp_by_offset(Register reg, + int offset_of_disp, + Register scratch) { + assert(ProfileInterpreter, "must be profiling interpreter"); + add(reg, offset_of_disp, scratch); + ld_ptr(ImethodDataPtr, scratch, scratch); + add(ImethodDataPtr, scratch, ImethodDataPtr); +} + +// Update the method data pointer by a simple constant displacement. + +void InterpreterMacroAssembler::update_mdp_by_constant(int constant) { + assert(ProfileInterpreter, "must be profiling interpreter"); + add(ImethodDataPtr, constant, ImethodDataPtr); +} + +// Update the method data pointer for a _ret bytecode whose target +// was not among our cached targets. + +void InterpreterMacroAssembler::update_mdp_for_ret(TosState state, + Register return_bci) { + assert(ProfileInterpreter, "must be profiling interpreter"); + push(state); + st_ptr(return_bci, l_tmp); // protect return_bci, in case it is volatile + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), return_bci); + ld_ptr(l_tmp, return_bci); + pop(state); +} + +// Count a taken branch in the bytecodes. + +void InterpreterMacroAssembler::profile_taken_branch(Register scratch, Register bumped_count) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(profile_continue); + + // We are taking a branch. Increment the taken count. + increment_mdp_data_at(in_bytes(JumpData::taken_offset()), bumped_count); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_offset(in_bytes(JumpData::displacement_offset()), scratch); + bind (profile_continue); + } +} + + +// Count a not-taken branch in the bytecodes. + +void InterpreterMacroAssembler::profile_not_taken_branch(Register scratch) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(profile_continue); + + // We are taking a branch. Increment the not taken count. + increment_mdp_data_at(in_bytes(BranchData::not_taken_offset()), scratch); + + // The method data pointer needs to be updated to correspond to the + // next bytecode. + update_mdp_by_constant(in_bytes(BranchData::branch_data_size())); + bind (profile_continue); + } +} + + +// Count a non-virtual call in the bytecodes. + +void InterpreterMacroAssembler::profile_call(Register scratch) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(profile_continue); + + // We are making a call. Increment the count. + increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(in_bytes(CounterData::counter_data_size())); + bind (profile_continue); + } +} + + +// Count a final call in the bytecodes. + +void InterpreterMacroAssembler::profile_final_call(Register scratch) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(profile_continue); + + // We are making a call. Increment the count. + increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(in_bytes(VirtualCallData::virtual_call_data_size())); + bind (profile_continue); + } +} + + +// Count a virtual call in the bytecodes. + +void InterpreterMacroAssembler::profile_virtual_call(Register receiver, + Register scratch, + bool receiver_can_be_null) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(profile_continue); + + + Label skip_receiver_profile; + if (receiver_can_be_null) { + Label not_null; + br_notnull_short(receiver, Assembler::pt, not_null); + // We are making a call. Increment the count for null receiver. + increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch); + ba_short(skip_receiver_profile); + bind(not_null); + } + + // Record the receiver type. + record_klass_in_profile(receiver, scratch, true); + bind(skip_receiver_profile); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(in_bytes(VirtualCallData::virtual_call_data_size())); + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::record_klass_in_profile_helper(Register receiver, Register scratch, + Label& done, bool is_virtual_call) { + if (TypeProfileWidth == 0) { + if (is_virtual_call) { + increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch); + } +#if INCLUDE_JVMCI + else if (EnableJVMCI) { + increment_mdp_data_at(in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset()), scratch); + } +#endif + } else { + int non_profiled_offset = -1; + if (is_virtual_call) { + non_profiled_offset = in_bytes(CounterData::count_offset()); + } +#if INCLUDE_JVMCI + else if (EnableJVMCI) { + non_profiled_offset = in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset()); + } +#endif + + record_item_in_profile_helper(receiver, scratch, 0, done, TypeProfileWidth, + &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset); + } +} + +void InterpreterMacroAssembler::record_item_in_profile_helper(Register item, + Register scratch, int start_row, Label& done, int total_rows, + OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, + int non_profiled_offset) { + int last_row = total_rows - 1; + assert(start_row <= last_row, "must be work left to do"); + // Test this row for both the item and for null. + // Take any of three different outcomes: + // 1. found item => increment count and goto done + // 2. found null => keep looking for case 1, maybe allocate this cell + // 3. found something else => keep looking for cases 1 and 2 + // Case 3 is handled by a recursive call. + for (int row = start_row; row <= last_row; row++) { + Label next_test; + bool test_for_null_also = (row == start_row); + + // See if the item is item[n]. + int item_offset = in_bytes(item_offset_fn(row)); + test_mdp_data_at(item_offset, item, next_test, scratch); + // delayed()->tst(scratch); + + // The receiver is item[n]. Increment count[n]. + int count_offset = in_bytes(item_count_offset_fn(row)); + increment_mdp_data_at(count_offset, scratch); + ba_short(done); + bind(next_test); + + if (test_for_null_also) { + Label found_null; + // Failed the equality check on item[n]... Test for null. + if (start_row == last_row) { + // The only thing left to do is handle the null case. + if (non_profiled_offset >= 0) { + brx(Assembler::zero, false, Assembler::pn, found_null); + delayed()->nop(); + // Item did not match any saved item and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + increment_mdp_data_at(non_profiled_offset, scratch); + ba_short(done); + bind(found_null); + } else { + brx(Assembler::notZero, false, Assembler::pt, done); + delayed()->nop(); + } + break; + } + // Since null is rare, make it be the branch-taken case. + brx(Assembler::zero, false, Assembler::pn, found_null); + delayed()->nop(); + + // Put all the "Case 3" tests here. + record_item_in_profile_helper(item, scratch, start_row + 1, done, total_rows, + item_offset_fn, item_count_offset_fn, non_profiled_offset); + + // Found a null. Keep searching for a matching item, + // but remember that this is an empty (unused) slot. + bind(found_null); + } + } + + // In the fall-through case, we found no matching item, but we + // observed the item[start_row] is NULL. + + // Fill in the item field and increment the count. + int item_offset = in_bytes(item_offset_fn(start_row)); + set_mdp_data_at(item_offset, item); + int count_offset = in_bytes(item_count_offset_fn(start_row)); + mov(DataLayout::counter_increment, scratch); + set_mdp_data_at(count_offset, scratch); + if (start_row > 0) { + ba_short(done); + } +} + +void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, + Register scratch, bool is_virtual_call) { + assert(ProfileInterpreter, "must be profiling"); + Label done; + + record_klass_in_profile_helper(receiver, scratch, done, is_virtual_call); + + bind (done); +} + + +// Count a ret in the bytecodes. + +void InterpreterMacroAssembler::profile_ret(TosState state, + Register return_bci, + Register scratch) { + if (ProfileInterpreter) { + Label profile_continue; + uint row; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(profile_continue); + + // Update the total ret count. + increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch); + + for (row = 0; row < RetData::row_limit(); row++) { + Label next_test; + + // See if return_bci is equal to bci[n]: + test_mdp_data_at(in_bytes(RetData::bci_offset(row)), + return_bci, next_test, scratch); + + // return_bci is equal to bci[n]. Increment the count. + increment_mdp_data_at(in_bytes(RetData::bci_count_offset(row)), scratch); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_offset(in_bytes(RetData::bci_displacement_offset(row)), scratch); + ba_short(profile_continue); + bind(next_test); + } + + update_mdp_for_ret(state, return_bci); + + bind (profile_continue); + } +} + +// Profile an unexpected null in the bytecodes. +void InterpreterMacroAssembler::profile_null_seen(Register scratch) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(profile_continue); + + set_mdp_flag_at(BitData::null_seen_byte_constant(), scratch); + + // The method data pointer needs to be updated. + int mdp_delta = in_bytes(BitData::bit_data_size()); + if (TypeProfileCasts) { + mdp_delta = in_bytes(ReceiverTypeData::receiver_type_data_size()); + } + update_mdp_by_constant(mdp_delta); + + bind (profile_continue); + } +} + +void InterpreterMacroAssembler::profile_typecheck(Register klass, + Register scratch) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(profile_continue); + + int mdp_delta = in_bytes(BitData::bit_data_size()); + if (TypeProfileCasts) { + mdp_delta = in_bytes(ReceiverTypeData::receiver_type_data_size()); + + // Record the object type. + record_klass_in_profile(klass, scratch, false); + } + + // The method data pointer needs to be updated. + update_mdp_by_constant(mdp_delta); + + bind (profile_continue); + } +} + +void InterpreterMacroAssembler::profile_typecheck_failed(Register scratch) { + if (ProfileInterpreter && TypeProfileCasts) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(profile_continue); + + int count_offset = in_bytes(CounterData::count_offset()); + // Back up the address, since we have already bumped the mdp. + count_offset -= in_bytes(ReceiverTypeData::receiver_type_data_size()); + + // *Decrement* the counter. We expect to see zero or small negatives. + increment_mdp_data_at(count_offset, scratch, true); + + bind (profile_continue); + } +} + +// Count the default case of a switch construct. + +void InterpreterMacroAssembler::profile_switch_default(Register scratch) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(profile_continue); + + // Update the default case count + increment_mdp_data_at(in_bytes(MultiBranchData::default_count_offset()), + scratch); + + // The method data pointer needs to be updated. + update_mdp_by_offset( + in_bytes(MultiBranchData::default_displacement_offset()), + scratch); + + bind (profile_continue); + } +} + +// Count the index'th case of a switch construct. + +void InterpreterMacroAssembler::profile_switch_case(Register index, + Register scratch, + Register scratch2, + Register scratch3) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(profile_continue); + + // Build the base (index * per_case_size_in_bytes()) + case_array_offset_in_bytes() + set(in_bytes(MultiBranchData::per_case_size()), scratch); + smul(index, scratch, scratch); + add(scratch, in_bytes(MultiBranchData::case_array_offset()), scratch); + + // Update the case count + increment_mdp_data_at(scratch, + in_bytes(MultiBranchData::relative_count_offset()), + scratch2, + scratch3); + + // The method data pointer needs to be updated. + update_mdp_by_offset(scratch, + in_bytes(MultiBranchData::relative_displacement_offset()), + scratch2); + + bind (profile_continue); + } +} + +void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr, Register tmp) { + Label not_null, do_nothing, do_update; + + assert_different_registers(obj, mdo_addr.base(), tmp); + + verify_oop(obj); + + ld_ptr(mdo_addr, tmp); + + br_notnull_short(obj, pt, not_null); + or3(tmp, TypeEntries::null_seen, tmp); + ba_short(do_update); + + bind(not_null); + load_klass(obj, obj); + + xor3(obj, tmp, obj); + btst(TypeEntries::type_klass_mask, obj); + // klass seen before, nothing to do. The unknown bit may have been + // set already but no need to check. + brx(zero, false, pt, do_nothing); + delayed()-> + + btst(TypeEntries::type_unknown, obj); + // already unknown. Nothing to do anymore. + brx(notZero, false, pt, do_nothing); + delayed()-> + + btst(TypeEntries::type_mask, tmp); + brx(zero, true, pt, do_update); + // first time here. Set profile type. + delayed()->or3(tmp, obj, tmp); + + // different than before. Cannot keep accurate profile. + or3(tmp, TypeEntries::type_unknown, tmp); + + bind(do_update); + // update profile + st_ptr(tmp, mdo_addr); + + bind(do_nothing); +} + +void InterpreterMacroAssembler::profile_arguments_type(Register callee, Register tmp1, Register tmp2, bool is_virtual) { + if (!ProfileInterpreter) { + return; + } + + assert_different_registers(callee, tmp1, tmp2, ImethodDataPtr); + + if (MethodData::profile_arguments() || MethodData::profile_return()) { + Label profile_continue; + + test_method_data_pointer(profile_continue); + + int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); + + ldub(ImethodDataPtr, in_bytes(DataLayout::tag_offset()) - off_to_start, tmp1); + cmp_and_br_short(tmp1, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag, notEqual, pn, profile_continue); + + if (MethodData::profile_arguments()) { + Label done; + int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset()); + add(ImethodDataPtr, off_to_args, ImethodDataPtr); + + for (int i = 0; i < TypeProfileArgsLimit; i++) { + if (i > 0 || MethodData::profile_return()) { + // If return value type is profiled we may have no argument to profile + ld_ptr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args, tmp1); + sub(tmp1, i*TypeStackSlotEntries::per_arg_count(), tmp1); + cmp_and_br_short(tmp1, TypeStackSlotEntries::per_arg_count(), less, pn, done); + } + ld_ptr(Address(callee, Method::const_offset()), tmp1); + lduh(Address(tmp1, ConstMethod::size_of_parameters_offset()), tmp1); + // stack offset o (zero based) from the start of the argument + // list, for n arguments translates into offset n - o - 1 from + // the end of the argument list. But there's an extra slot at + // the stop of the stack. So the offset is n - o from Lesp. + ld_ptr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args, tmp2); + sub(tmp1, tmp2, tmp1); + + // Can't use MacroAssembler::argument_address() which needs Gargs to be set up + sll(tmp1, Interpreter::logStackElementSize, tmp1); + ld_ptr(Lesp, tmp1, tmp1); + + Address mdo_arg_addr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args); + profile_obj_type(tmp1, mdo_arg_addr, tmp2); + + int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); + add(ImethodDataPtr, to_add, ImethodDataPtr); + off_to_args += to_add; + } + + if (MethodData::profile_return()) { + ld_ptr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args, tmp1); + sub(tmp1, TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count(), tmp1); + } + + bind(done); + + if (MethodData::profile_return()) { + // We're right after the type profile for the last + // argument. tmp1 is the number of cells left in the + // CallTypeData/VirtualCallTypeData to reach its end. Non null + // if there's a return to profile. + assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); + sll(tmp1, exact_log2(DataLayout::cell_size), tmp1); + add(ImethodDataPtr, tmp1, ImethodDataPtr); + } + } else { + assert(MethodData::profile_return(), "either profile call args or call ret"); + update_mdp_by_constant(in_bytes(TypeEntriesAtCall::return_only_size())); + } + + // mdp points right after the end of the + // CallTypeData/VirtualCallTypeData, right after the cells for the + // return value type if there's one. + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_return_type(Register ret, Register tmp1, Register tmp2) { + assert_different_registers(ret, tmp1, tmp2); + if (ProfileInterpreter && MethodData::profile_return()) { + Label profile_continue, done; + + test_method_data_pointer(profile_continue); + + if (MethodData::profile_return_jsr292_only()) { + assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); + + // If we don't profile all invoke bytecodes we must make sure + // it's a bytecode we indeed profile. We can't go back to the + // beginning of the ProfileData we intend to update to check its + // type because we're right after it and we don't known its + // length. + Label do_profile; + ldub(Lbcp, 0, tmp1); + cmp_and_br_short(tmp1, Bytecodes::_invokedynamic, equal, pn, do_profile); + cmp(tmp1, Bytecodes::_invokehandle); + br(equal, false, pn, do_profile); + delayed()->lduh(Lmethod, Method::intrinsic_id_offset_in_bytes(), tmp1); + cmp_and_br_short(tmp1, static_cast(vmIntrinsics::_compiledLambdaForm), notEqual, pt, profile_continue); + + bind(do_profile); + } + + Address mdo_ret_addr(ImethodDataPtr, -in_bytes(ReturnTypeEntry::size())); + mov(ret, tmp1); + profile_obj_type(tmp1, mdo_ret_addr, tmp2); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_parameters_type(Register tmp1, Register tmp2, Register tmp3, Register tmp4) { + if (ProfileInterpreter && MethodData::profile_parameters()) { + Label profile_continue, done; + + test_method_data_pointer(profile_continue); + + // Load the offset of the area within the MDO used for + // parameters. If it's negative we're not profiling any parameters. + lduw(ImethodDataPtr, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()), tmp1); + cmp_and_br_short(tmp1, 0, less, pn, profile_continue); + + // Compute a pointer to the area for parameters from the offset + // and move the pointer to the slot for the last + // parameters. Collect profiling from last parameter down. + // mdo start + parameters offset + array length - 1 + + // Pointer to the parameter area in the MDO + Register mdp = tmp1; + add(ImethodDataPtr, tmp1, mdp); + + // offset of the current profile entry to update + Register entry_offset = tmp2; + // entry_offset = array len in number of cells + ld_ptr(mdp, ArrayData::array_len_offset(), entry_offset); + + int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); + assert(off_base % DataLayout::cell_size == 0, "should be a number of cells"); + + // entry_offset (number of cells) = array len - size of 1 entry + offset of the stack slot field + sub(entry_offset, TypeStackSlotEntries::per_arg_count() - (off_base / DataLayout::cell_size), entry_offset); + // entry_offset in bytes + sll(entry_offset, exact_log2(DataLayout::cell_size), entry_offset); + + Label loop; + bind(loop); + + // load offset on the stack from the slot for this parameter + ld_ptr(mdp, entry_offset, tmp3); + sll(tmp3,Interpreter::logStackElementSize, tmp3); + neg(tmp3); + // read the parameter from the local area + ld_ptr(Llocals, tmp3, tmp3); + + // make entry_offset now point to the type field for this parameter + int type_base = in_bytes(ParametersTypeData::type_offset(0)); + assert(type_base > off_base, "unexpected"); + add(entry_offset, type_base - off_base, entry_offset); + + // profile the parameter + Address arg_type(mdp, entry_offset); + profile_obj_type(tmp3, arg_type, tmp4); + + // go to next parameter + sub(entry_offset, TypeStackSlotEntries::per_arg_count() * DataLayout::cell_size + (type_base - off_base), entry_offset); + cmp_and_br_short(entry_offset, off_base, greaterEqual, pt, loop); + + bind(profile_continue); + } +} + +// add a InterpMonitorElem to stack (see frame_sparc.hpp) + +void InterpreterMacroAssembler::add_monitor_to_stack( bool stack_is_empty, + Register Rtemp, + Register Rtemp2 ) { + + Register Rlimit = Lmonitors; + const jint delta = frame::interpreter_frame_monitor_size() * wordSize; + assert( (delta & LongAlignmentMask) == 0, + "sizeof BasicObjectLock must be even number of doublewords"); + + sub( SP, delta, SP); + sub( Lesp, delta, Lesp); + sub( Lmonitors, delta, Lmonitors); + + if (!stack_is_empty) { + + // must copy stack contents down + + Label start_copying, next; + + // untested("monitor stack expansion"); + compute_stack_base(Rtemp); + ba(start_copying); + delayed()->cmp(Rtemp, Rlimit); // done? duplicated below + + // note: must copy from low memory upwards + // On entry to loop, + // Rtemp points to new base of stack, Lesp points to new end of stack (1 past TOS) + // Loop mutates Rtemp + + bind( next); + + st_ptr(Rtemp2, Rtemp, 0); + inc(Rtemp, wordSize); + cmp(Rtemp, Rlimit); // are we done? (duplicated above) + + bind( start_copying ); + + brx( notEqual, true, pn, next ); + delayed()->ld_ptr( Rtemp, delta, Rtemp2 ); + + // done copying stack + } +} + +// Locals +void InterpreterMacroAssembler::access_local_ptr( Register index, Register dst ) { + assert_not_delayed(); + sll(index, Interpreter::logStackElementSize, index); + sub(Llocals, index, index); + ld_ptr(index, 0, dst); + // Note: index must hold the effective address--the iinc template uses it +} + +// Just like access_local_ptr but the tag is a returnAddress +void InterpreterMacroAssembler::access_local_returnAddress(Register index, + Register dst ) { + assert_not_delayed(); + sll(index, Interpreter::logStackElementSize, index); + sub(Llocals, index, index); + ld_ptr(index, 0, dst); +} + +void InterpreterMacroAssembler::access_local_int( Register index, Register dst ) { + assert_not_delayed(); + sll(index, Interpreter::logStackElementSize, index); + sub(Llocals, index, index); + ld(index, 0, dst); + // Note: index must hold the effective address--the iinc template uses it +} + + +void InterpreterMacroAssembler::access_local_long( Register index, Register dst ) { + assert_not_delayed(); + sll(index, Interpreter::logStackElementSize, index); + sub(Llocals, index, index); + // First half stored at index n+1 (which grows down from Llocals[n]) + load_unaligned_long(index, Interpreter::local_offset_in_bytes(1), dst); +} + + +void InterpreterMacroAssembler::access_local_float( Register index, FloatRegister dst ) { + assert_not_delayed(); + sll(index, Interpreter::logStackElementSize, index); + sub(Llocals, index, index); + ldf(FloatRegisterImpl::S, index, 0, dst); +} + + +void InterpreterMacroAssembler::access_local_double( Register index, FloatRegister dst ) { + assert_not_delayed(); + sll(index, Interpreter::logStackElementSize, index); + sub(Llocals, index, index); + load_unaligned_double(index, Interpreter::local_offset_in_bytes(1), dst); +} + + +#ifdef ASSERT +void InterpreterMacroAssembler::check_for_regarea_stomp(Register Rindex, int offset, Register Rlimit, Register Rscratch, Register Rscratch1) { + Label L; + + assert(Rindex != Rscratch, "Registers cannot be same"); + assert(Rindex != Rscratch1, "Registers cannot be same"); + assert(Rlimit != Rscratch, "Registers cannot be same"); + assert(Rlimit != Rscratch1, "Registers cannot be same"); + assert(Rscratch1 != Rscratch, "Registers cannot be same"); + + // untested("reg area corruption"); + add(Rindex, offset, Rscratch); + add(Rlimit, 64 + STACK_BIAS, Rscratch1); + cmp_and_brx_short(Rscratch, Rscratch1, Assembler::greaterEqualUnsigned, pn, L); + stop("regsave area is being clobbered"); + bind(L); +} +#endif // ASSERT + + +void InterpreterMacroAssembler::store_local_int( Register index, Register src ) { + assert_not_delayed(); + sll(index, Interpreter::logStackElementSize, index); + sub(Llocals, index, index); + debug_only(check_for_regarea_stomp(index, 0, FP, G1_scratch, G4_scratch);) + st(src, index, 0); +} + +void InterpreterMacroAssembler::store_local_ptr( Register index, Register src ) { + assert_not_delayed(); + sll(index, Interpreter::logStackElementSize, index); + sub(Llocals, index, index); +#ifdef ASSERT + check_for_regarea_stomp(index, 0, FP, G1_scratch, G4_scratch); +#endif + st_ptr(src, index, 0); +} + + + +void InterpreterMacroAssembler::store_local_ptr( int n, Register src ) { + st_ptr(src, Llocals, Interpreter::local_offset_in_bytes(n)); +} + +void InterpreterMacroAssembler::store_local_long( Register index, Register src ) { + assert_not_delayed(); + sll(index, Interpreter::logStackElementSize, index); + sub(Llocals, index, index); +#ifdef ASSERT + check_for_regarea_stomp(index, Interpreter::local_offset_in_bytes(1), FP, G1_scratch, G4_scratch); +#endif + store_unaligned_long(src, index, Interpreter::local_offset_in_bytes(1)); // which is n+1 +} + + +void InterpreterMacroAssembler::store_local_float( Register index, FloatRegister src ) { + assert_not_delayed(); + sll(index, Interpreter::logStackElementSize, index); + sub(Llocals, index, index); +#ifdef ASSERT + check_for_regarea_stomp(index, 0, FP, G1_scratch, G4_scratch); +#endif + stf(FloatRegisterImpl::S, src, index, 0); +} + + +void InterpreterMacroAssembler::store_local_double( Register index, FloatRegister src ) { + assert_not_delayed(); + sll(index, Interpreter::logStackElementSize, index); + sub(Llocals, index, index); +#ifdef ASSERT + check_for_regarea_stomp(index, Interpreter::local_offset_in_bytes(1), FP, G1_scratch, G4_scratch); +#endif + store_unaligned_double(src, index, Interpreter::local_offset_in_bytes(1)); +} + + +int InterpreterMacroAssembler::top_most_monitor_byte_offset() { + const jint delta = frame::interpreter_frame_monitor_size() * wordSize; + int rounded_vm_local_words = align_up((int)frame::interpreter_frame_vm_local_words, WordsPerLong); + return ((-rounded_vm_local_words * wordSize) - delta ) + STACK_BIAS; +} + + +Address InterpreterMacroAssembler::top_most_monitor() { + return Address(FP, top_most_monitor_byte_offset()); +} + + +void InterpreterMacroAssembler::compute_stack_base( Register Rdest ) { + add( Lesp, wordSize, Rdest ); +} + +void InterpreterMacroAssembler::get_method_counters(Register method, + Register Rcounters, + Label& skip) { + Label has_counters; + Address method_counters(method, in_bytes(Method::method_counters_offset())); + ld_ptr(method_counters, Rcounters); + br_notnull_short(Rcounters, Assembler::pt, has_counters); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::build_method_counters), method); + ld_ptr(method_counters, Rcounters); + br_null(Rcounters, false, Assembler::pn, skip); // No MethodCounters, OutOfMemory + delayed()->nop(); + bind(has_counters); +} + +void InterpreterMacroAssembler::increment_invocation_counter( Register Rcounters, Register Rtmp, Register Rtmp2 ) { + assert(UseCompiler || LogTouchedMethods, "incrementing must be useful"); + assert_different_registers(Rcounters, Rtmp, Rtmp2); + + Address inv_counter(Rcounters, MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset()); + Address be_counter (Rcounters, MethodCounters::backedge_counter_offset() + + InvocationCounter::counter_offset()); + int delta = InvocationCounter::count_increment; + + // Load each counter in a register + ld( inv_counter, Rtmp ); + ld( be_counter, Rtmp2 ); + + assert( is_simm13( delta ), " delta too large."); + + // Add the delta to the invocation counter and store the result + add( Rtmp, delta, Rtmp ); + + // Mask the backedge counter + and3( Rtmp2, InvocationCounter::count_mask_value, Rtmp2 ); + + // Store value + st( Rtmp, inv_counter); + + // Add invocation counter + backedge counter + add( Rtmp, Rtmp2, Rtmp); + + // Note that this macro must leave the backedge_count + invocation_count in Rtmp! +} + +void InterpreterMacroAssembler::increment_backedge_counter( Register Rcounters, Register Rtmp, Register Rtmp2 ) { + assert(UseCompiler, "incrementing must be useful"); + assert_different_registers(Rcounters, Rtmp, Rtmp2); + + Address be_counter (Rcounters, MethodCounters::backedge_counter_offset() + + InvocationCounter::counter_offset()); + Address inv_counter(Rcounters, MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset()); + + int delta = InvocationCounter::count_increment; + // Load each counter in a register + ld( be_counter, Rtmp ); + ld( inv_counter, Rtmp2 ); + + // Add the delta to the backedge counter + add( Rtmp, delta, Rtmp ); + + // Mask the invocation counter, add to backedge counter + and3( Rtmp2, InvocationCounter::count_mask_value, Rtmp2 ); + + // and store the result to memory + st( Rtmp, be_counter ); + + // Add backedge + invocation counter + add( Rtmp, Rtmp2, Rtmp ); + + // Note that this macro must leave backedge_count + invocation_count in Rtmp! +} + +void InterpreterMacroAssembler::interp_verify_oop(Register reg, TosState state, const char * file, int line) { + if (state == atos) { MacroAssembler::_verify_oop(reg, "broken oop ", file, line); } +} + + +// local helper function for the verify_oop_or_return_address macro +static bool verify_return_address(Method* m, int bci) { +#ifndef PRODUCT + address pc = (address)(m->constMethod()) + + in_bytes(ConstMethod::codes_offset()) + bci; + // assume it is a valid return address if it is inside m and is preceded by a jsr + if (!m->contains(pc)) return false; + address jsr_pc; + jsr_pc = pc - Bytecodes::length_for(Bytecodes::_jsr); + if (*jsr_pc == Bytecodes::_jsr && jsr_pc >= m->code_base()) return true; + jsr_pc = pc - Bytecodes::length_for(Bytecodes::_jsr_w); + if (*jsr_pc == Bytecodes::_jsr_w && jsr_pc >= m->code_base()) return true; +#endif // PRODUCT + return false; +} + + +void InterpreterMacroAssembler::verify_oop_or_return_address(Register reg, Register Rtmp) { + if (!VerifyOops) return; + // the VM documentation for the astore[_wide] bytecode allows + // the TOS to be not only an oop but also a return address + Label test; + Label skip; + // See if it is an address (in the current method): + + mov(reg, Rtmp); + const int log2_bytecode_size_limit = 16; + srl(Rtmp, log2_bytecode_size_limit, Rtmp); + br_notnull_short( Rtmp, pt, test ); + + // %%% should use call_VM_leaf here? + save_frame_and_mov(0, Lmethod, O0, reg, O1); + save_thread(L7_thread_cache); + call(CAST_FROM_FN_PTR(address,verify_return_address), relocInfo::none); + delayed()->nop(); + restore_thread(L7_thread_cache); + br_notnull( O0, false, pt, skip ); + delayed()->restore(); + + // Perform a more elaborate out-of-line call + // Not an address; verify it: + bind(test); + verify_oop(reg); + bind(skip); +} + + +// Jump if ((*counter_addr += increment) & mask) satisfies the condition. +void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, + int increment, Address mask_addr, + Register scratch1, Register scratch2, + Condition cond, Label *where) { + ld(counter_addr, scratch1); + add(scratch1, increment, scratch1); + ld(mask_addr, scratch2); + andcc(scratch1, scratch2, G0); + br(cond, false, Assembler::pn, *where); + delayed()->st(scratch1, counter_addr); +} + +// Inline assembly for: +// +// if (thread is in interp_only_mode) { +// InterpreterRuntime::post_method_entry(); +// } +// if (DTraceMethodProbes) { +// SharedRuntime::dtrace_method_entry(method, receiver); +// } +// if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) { +// SharedRuntime::rc_trace_method_entry(method, receiver); +// } + +void InterpreterMacroAssembler::notify_method_entry() { + + // Whenever JVMTI puts a thread in interp_only_mode, method + // entry/exit events are sent for that thread to track stack + // depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + if (JvmtiExport::can_post_interpreter_events()) { + Label L; + Register temp_reg = O5; + const Address interp_only(G2_thread, JavaThread::interp_only_mode_offset()); + ld(interp_only, temp_reg); + cmp_and_br_short(temp_reg, 0, equal, pt, L); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_entry)); + bind(L); + } + + { + Register temp_reg = O5; + SkipIfEqual skip_if(this, temp_reg, &DTraceMethodProbes, zero); + call_VM_leaf(noreg, + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), + G2_thread, Lmethod); + } + + // RedefineClasses() tracing support for obsolete method entry + if (log_is_enabled(Trace, redefine, class, obsolete)) { + call_VM_leaf(noreg, + CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), + G2_thread, Lmethod); + } +} + + +// Inline assembly for: +// +// if (thread is in interp_only_mode) { +// // save result +// InterpreterRuntime::post_method_exit(); +// // restore result +// } +// if (DTraceMethodProbes) { +// SharedRuntime::dtrace_method_exit(thread, method); +// } +// +// Native methods have their result stored in d_tmp and l_tmp +// Java methods have their result stored in the expression stack + +void InterpreterMacroAssembler::notify_method_exit(bool is_native_method, + TosState state, + NotifyMethodExitMode mode) { + + // Whenever JVMTI puts a thread in interp_only_mode, method + // entry/exit events are sent for that thread to track stack + // depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) { + Label L; + Register temp_reg = O5; + const Address interp_only(G2_thread, JavaThread::interp_only_mode_offset()); + ld(interp_only, temp_reg); + cmp_and_br_short(temp_reg, 0, equal, pt, L); + + // Note: frame::interpreter_frame_result has a dependency on how the + // method result is saved across the call to post_method_exit. For + // native methods it assumes the result registers are saved to + // l_scratch and d_scratch. If this changes then the interpreter_frame_result + // implementation will need to be updated too. + + save_return_value(state, is_native_method); + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); + restore_return_value(state, is_native_method); + bind(L); + } + + { + Register temp_reg = O5; + // Dtrace notification + SkipIfEqual skip_if(this, temp_reg, &DTraceMethodProbes, zero); + save_return_value(state, is_native_method); + call_VM_leaf( + noreg, + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), + G2_thread, Lmethod); + restore_return_value(state, is_native_method); + } +} + +void InterpreterMacroAssembler::save_return_value(TosState state, bool is_native_call) { + if (is_native_call) { + stf(FloatRegisterImpl::D, F0, d_tmp); + stx(O0, l_tmp); + } else { + push(state); + } +} + +void InterpreterMacroAssembler::restore_return_value( TosState state, bool is_native_call) { + if (is_native_call) { + ldf(FloatRegisterImpl::D, d_tmp, F0); + ldx(l_tmp, O0); + } else { + pop(state); + } +} diff -ur --new-file a/src/hotspot/cpu/sparc/interp_masm_sparc.hpp b/src/hotspot/cpu/sparc/interp_masm_sparc.hpp --- a/src/hotspot/cpu/sparc/interp_masm_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/interp_masm_sparc.hpp 2023-04-16 11:42:11.065022540 +0000 @@ -0,0 +1,334 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_INTERP_MASM_SPARC_HPP +#define CPU_SPARC_INTERP_MASM_SPARC_HPP + +#include "asm/macroAssembler.hpp" +#include "interpreter/invocationCounter.hpp" + +// This file specializes the assembler with interpreter-specific macros + +typedef ByteSize (*OffsetFunction)(uint); + +REGISTER_DECLARATION( Register, Otos_i , O0); // tos for ints, etc +REGISTER_DECLARATION( Register, Otos_l , O0); // for longs +REGISTER_DECLARATION( Register, Otos_l1, O0); // for 1st part of longs +REGISTER_DECLARATION( Register, Otos_l2, O1); // for 2nd part of longs +REGISTER_DECLARATION(FloatRegister, Ftos_f , F0); // for floats +REGISTER_DECLARATION(FloatRegister, Ftos_d , F0); // for doubles +REGISTER_DECLARATION(FloatRegister, Ftos_d1, F0); // for 1st part of double +REGISTER_DECLARATION(FloatRegister, Ftos_d2, F1); // for 2nd part of double + +class InterpreterMacroAssembler: public MacroAssembler { + protected: + // Interpreter specific version of call_VM_base + virtual void call_VM_leaf_base( + Register java_thread, + address entry_point, + int number_of_arguments + ); + + virtual void call_VM_base( + Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exception=true + ); + + // base routine for all dispatches + void dispatch_base(TosState state, address* table); + + public: + InterpreterMacroAssembler(CodeBuffer* c) + : MacroAssembler(c) {} + + virtual void check_and_handle_popframe(Register scratch_reg); + virtual void check_and_handle_earlyret(Register scratch_reg); + + void jump_to_entry(address entry); + + virtual void load_earlyret_value(TosState state); + + static const Address l_tmp ; + static const Address d_tmp ; + + // helper routine for frame allocation/deallocation + // compute the delta by which the caller's SP has to + // be adjusted to accommodate for the non-argument + // locals + void compute_extra_locals_size_in_bytes(Register args_size, Register locals_size, Register delta); + + // dispatch routines + void dispatch_prolog(TosState state, int step = 0); + void dispatch_epilog(TosState state, int step = 0); + void dispatch_only(TosState state); + void dispatch_normal(TosState state); + void dispatch_next(TosState state, int step = 0, bool generate_poll = false); + void dispatch_next_noverify_oop(TosState state, int step = 0); + void dispatch_via (TosState state, address* table); + + + void narrow(Register result); + + // Removes the current activation (incl. unlocking of monitors). + // Additionally this code is used for earlyReturn in which case we + // want to skip throwing an exception and installing an exception. + void remove_activation(TosState state, + bool throw_monitor_exception = true, + bool install_monitor_exception = true); + + protected: + void dispatch_Lbyte_code(TosState state, address* table, int bcp_incr = 0, bool verify = true, bool generate_poll = false); + + public: + // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls + void super_call_VM(Register thread_cache, + Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + Register arg_2, + bool check_exception = true); + + void super_call_VM_leaf(Register thread_cache, address entry_point, Register arg_1, Register arg_2); + + // Generate a subtype check: branch to ok_is_subtype if sub_klass is + // a subtype of super_klass. Blows registers tmp1, tmp2 and tmp3. + void gen_subtype_check( Register sub_klass, Register super_klass, Register tmp1, Register tmp2, Register tmp3, Label &ok_is_subtype ); + + // helpers for tossing exceptions + void throw_if_not_1_icc( Condition ok_condition, Label& ok ); + void throw_if_not_1_xcc( Condition ok_condition, Label& ok ); + void throw_if_not_1_x ( Condition ok_condition, Label& ok ); // chooses icc or xcc based on _LP64 + + void throw_if_not_2( address throw_entry_point, Register Rscratch, Label& ok); + + void throw_if_not_icc( Condition ok_condition, address throw_entry_point, Register Rscratch ); + void throw_if_not_xcc( Condition ok_condition, address throw_entry_point, Register Rscratch ); + void throw_if_not_x ( Condition ok_condition, address throw_entry_point, Register Rscratch ); + + // helpers for expression stack + + void pop_i( Register r = Otos_i); + void pop_ptr( Register r = Otos_i, Register scratch = O4); + void pop_l( Register r = Otos_l1); + // G4_scratch and Lscratch are used at call sites!! + void pop_f(FloatRegister f = Ftos_f, Register scratch = G1_scratch); + void pop_d(FloatRegister f = Ftos_d1, Register scratch = G1_scratch); + + void push_i( Register r = Otos_i); + void push_ptr( Register r = Otos_i); + void push_l( Register r = Otos_l1); + void push_f(FloatRegister f = Ftos_f); + void push_d(FloatRegister f = Ftos_d1); + + + void pop (TosState state); // transition vtos -> state + void push(TosState state); // transition state -> vtos + void empty_expression_stack(); // resets both Lesp and SP + +#ifdef ASSERT + void verify_sp(Register Rsp, Register Rtemp); + void verify_esp(Register Resp); // verify that Lesp points to a word in the temp stack +#endif // ASSERT + + public: + void if_cmp(Condition cc, bool ptr_compare); + + // Load values from bytecode stream: + + enum signedOrNot { Signed, Unsigned }; + enum setCCOrNot { set_CC, dont_set_CC }; + + void get_2_byte_integer_at_bcp( int bcp_offset, + Register Rtmp, + Register Rdst, + signedOrNot is_signed, + setCCOrNot should_set_CC = dont_set_CC ); + + void get_4_byte_integer_at_bcp( int bcp_offset, + Register Rtmp, + Register Rdst, + setCCOrNot should_set_CC = dont_set_CC ); + + // Note: "get_cache_and_index" really means "get the index, use it to get the cache entry, and throw away the index". + void get_cache_and_index_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register temp, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2)); + // Note: This one does not fetch the cache. The first argument is a temp which may be killed. + void get_cache_index_at_bcp(Register temp, Register index, int bcp_offset, size_t index_size = sizeof(u2)); + + // load cpool->resolved_references(index); + void load_resolved_reference_at_index(Register result, Register index, Register tmp); + + // load cpool->resolved_klass_at(index) + void load_resolved_klass_at_offset(Register Rcpool, Register Roffset, Register Rklass); + + // common code + + void field_offset_at(int n, Register tmp, Register dest, Register base); + int field_offset_at(Register object, address bcp, int offset); + void fast_iaaccess(int n, address bcp); + void fast_iaputfield(address bcp, bool do_store_check ); + + void index_check(Register array, Register index, int index_shift, Register tmp, Register res); + void index_check_without_pop(Register array, Register index, int index_shift, Register tmp, Register res); + + void get_const(Register Rdst); + void get_constant_pool(Register Rdst); + void get_constant_pool_cache(Register Rdst); + void get_cpool_and_tags(Register Rcpool, Register Rtags); + void is_a(Label& L); + + // Load compiled (i2c) or interpreter entry and call from interpreted + void call_from_interpreter(Register target, Register scratch, Register Rret); + + // -------------------------------------------------- + + void unlock_if_synchronized_method(TosState state, bool throw_monitor_exception = true, bool install_monitor_exception = true); + + void add_monitor_to_stack( bool stack_is_empty, + Register Rtemp, + Register Rtemp2 ); + + // Load/store aligned in _LP64 but unaligned otherwise + // These only apply to the Interpreter expression stack and locals! + void load_unaligned_double(Register r1, int offset, FloatRegister d); + void store_unaligned_double(FloatRegister d, Register r1, int offset ); + + // Load/store aligned in _LP64 but unaligned otherwise + void load_unaligned_long(Register r1, int offset, Register d); + void store_unaligned_long(Register d, Register r1, int offset ); + + void access_local_int( Register index, Register dst ); + void access_local_ptr( Register index, Register dst ); + void access_local_returnAddress( Register index, Register dst ); + void access_local_long( Register index, Register dst ); + void access_local_float( Register index, FloatRegister dst ); + void access_local_double( Register index, FloatRegister dst ); +#ifdef ASSERT + void check_for_regarea_stomp( Register Rindex, int offset, Register Rlimit, Register Rscratch, Register Rscratch1); +#endif // ASSERT + void store_local_int( Register index, Register src ); + void store_local_ptr( Register index, Register src ); + void store_local_ptr( int n, Register src ); + void store_local_long( Register index, Register src ); + void store_local_float( Register index, FloatRegister src ); + void store_local_double( Register index, FloatRegister src ); + + // Helpers for swap and dup + void load_ptr(int n, Register val); + void store_ptr(int n, Register val); + + // Helper for getting receiver in register. + void load_receiver(Register param_count, Register recv); + + static int top_most_monitor_byte_offset(); // offset in bytes to top of monitor block + Address top_most_monitor(); + void compute_stack_base( Register Rdest ); + + void get_method_counters(Register method, Register Rcounters, Label& skip); + void increment_invocation_counter( Register Rcounters, Register Rtmp, Register Rtmp2 ); + void increment_backedge_counter( Register Rcounters, Register Rtmp, Register Rtmp2 ); + + // Object locking + void lock_object (Register lock_reg, Register obj_reg); + void unlock_object(Register lock_reg); + + // Interpreter profiling operations + void set_method_data_pointer(); + void set_method_data_pointer_for_bcp(); + void test_method_data_pointer(Label& zero_continue); + void verify_method_data_pointer(); + + void set_mdp_data_at(int constant, Register value); + void increment_mdp_data_at(Address counter, Register bumped_count, + bool decrement = false); + void increment_mdp_data_at(int constant, Register bumped_count, + bool decrement = false); + void increment_mdp_data_at(Register reg, int constant, + Register bumped_count, Register scratch2, + bool decrement = false); + void increment_mask_and_jump(Address counter_addr, + int increment, Address mask_addr, + Register scratch1, Register scratch2, + Condition cond, Label *where); + void set_mdp_flag_at(int flag_constant, Register scratch); + void test_mdp_data_at(int offset, Register value, Label& not_equal_continue, + Register scratch); + + void record_klass_in_profile(Register receiver, Register scratch, bool is_virtual_call); + void record_klass_in_profile_helper(Register receiver, Register scratch, + Label& done, bool is_virtual_call); + void record_item_in_profile_helper(Register item, + Register scratch, int start_row, Label& done, int total_rows, + OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, + int non_profiled_offset); + + void update_mdp_by_offset(int offset_of_disp, Register scratch); + void update_mdp_by_offset(Register reg, int offset_of_disp, + Register scratch); + void update_mdp_by_constant(int constant); + void update_mdp_for_ret(TosState state, Register return_bci); + + void profile_taken_branch(Register scratch, Register bumped_count); + void profile_not_taken_branch(Register scratch); + void profile_call(Register scratch); + void profile_final_call(Register scratch); + void profile_virtual_call(Register receiver, Register scratch, bool receiver_can_be_null = false); + void profile_ret(TosState state, Register return_bci, Register scratch); + void profile_null_seen(Register scratch); + void profile_typecheck(Register klass, Register scratch); + void profile_typecheck_failed(Register scratch); + void profile_switch_default(Register scratch); + void profile_switch_case(Register index, + Register scratch1, + Register scratch2, + Register scratch3); + + void profile_obj_type(Register obj, const Address& mdo_addr, Register tmp); + void profile_arguments_type(Register callee, Register tmp1, Register tmp2, bool is_virtual); + void profile_return_type(Register ret, Register tmp1, Register tmp2); + void profile_parameters_type(Register tmp1, Register tmp2, Register tmp3, Register tmp4); + + // Debugging + void interp_verify_oop(Register reg, TosState state, const char * file, int line); // only if +VerifyOops && state == atos + void verify_oop_or_return_address(Register reg, Register rtmp); // for astore + void verify_FPU(int stack_depth, TosState state = ftos) {} // No-op. + + // support for JVMTI/Dtrace + typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; + void notify_method_entry(); + void notify_method_exit( + bool save_result, TosState state, NotifyMethodExitMode mode); + + void save_return_value(TosState state, bool is_native_call); + void restore_return_value(TosState state, bool is_native_call); + +}; + +#endif // CPU_SPARC_INTERP_MASM_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/interpreterRT_sparc.cpp b/src/hotspot/cpu/sparc/interpreterRT_sparc.cpp --- a/src/hotspot/cpu/sparc/interpreterRT_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/interpreterRT_sparc.cpp 2023-04-16 11:42:11.065190896 +0000 @@ -0,0 +1,206 @@ +/* + * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "memory/allocation.inline.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/icache.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/signature.hpp" + + +#define __ _masm-> + + +// Implementation of SignatureHandlerGenerator +InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator( + const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) { + _masm = new MacroAssembler(buffer); +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_word(int size_of_arg, int offset_in_arg) { + Argument jni_arg(jni_offset() + offset_in_arg, false); + Register Rtmp = O0; + __ ld(Llocals, Interpreter::local_offset_in_bytes(offset()), Rtmp); + + __ store_argument(Rtmp, jni_arg); +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { + Argument jni_arg(jni_offset(), false); + Register Rtmp = O0; + + __ ldx(Llocals, Interpreter::local_offset_in_bytes(offset() + 1), Rtmp); + __ store_long_argument(Rtmp, jni_arg); +} + + +void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { + Argument jni_arg(jni_offset(), false); + FloatRegister Rtmp = F0; + __ ldf(FloatRegisterImpl::S, Llocals, Interpreter::local_offset_in_bytes(offset()), Rtmp); + __ store_float_argument(Rtmp, jni_arg); +} + + +void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { + Argument jni_arg(jni_offset(), false); + FloatRegister Rtmp = F0; + __ ldf(FloatRegisterImpl::D, Llocals, Interpreter::local_offset_in_bytes(offset() + 1), Rtmp); + __ store_double_argument(Rtmp, jni_arg); +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { + Argument jni_arg(jni_offset(), false); + Argument java_arg( offset(), true); + Register Rtmp1 = O0; + Register Rtmp2 = jni_arg.is_register() ? jni_arg.as_register() : O0; + Register Rtmp3 = G3_scratch; + + // the handle for a receiver will never be null + bool do_NULL_check = offset() != 0 || is_static(); + + Address h_arg = Address(Llocals, Interpreter::local_offset_in_bytes(offset())); + __ ld_ptr(h_arg, Rtmp1); + if (!do_NULL_check) { + __ add(h_arg.base(), h_arg.disp(), Rtmp2); + } else { + if (Rtmp1 == Rtmp2) + __ tst(Rtmp1); + else __ addcc(G0, Rtmp1, Rtmp2); // optimize mov/test pair + Label L; + __ brx(Assembler::notZero, true, Assembler::pt, L); + __ delayed()->add(h_arg.base(), h_arg.disp(), Rtmp2); + __ bind(L); + } + __ store_ptr_argument(Rtmp2, jni_arg); // this is often a no-op +} + + +void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { + + // generate code to handle arguments + iterate(fingerprint); + + // return result handler + AddressLiteral result_handler(Interpreter::result_handler(method()->result_type())); + __ sethi(result_handler, Lscratch); + __ retl(); + __ delayed()->add(Lscratch, result_handler.low10(), Lscratch); + + __ flush(); +} + + +// Implementation of SignatureHandlerLibrary + +void SignatureHandlerLibrary::pd_set_handler(address handler) {} + + +class SlowSignatureHandler: public NativeSignatureIterator { + private: + address _from; + intptr_t* _to; + intptr_t* _RegArgSignature; // Signature of first Arguments to be passed in Registers + uint _argcount; + + enum { // We need to differentiate float from non floats in reg args + non_float = 0, + float_sig = 1, + double_sig = 2, + long_sig = 3 + }; + + virtual void pass_int() { + *_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + add_signature( non_float ); + } + + virtual void pass_object() { + // pass address of from + intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); + *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; + _from -= Interpreter::stackElementSize; + add_signature( non_float ); + } + + virtual void pass_float() { + *_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + add_signature( float_sig ); + } + + virtual void pass_double() { + *_to++ = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); + _from -= 2*Interpreter::stackElementSize; + add_signature( double_sig ); + } + + virtual void pass_long() { + _to[0] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); + _to += 1; + _from -= 2*Interpreter::stackElementSize; + add_signature( long_sig ); + } + + virtual void add_signature( intptr_t sig_type ) { + if ( _argcount < (sizeof (intptr_t))*4 ) { + *_RegArgSignature |= (sig_type << (_argcount*2) ); + _argcount++; + } + } + + + public: + SlowSignatureHandler(const methodHandle& method, address from, intptr_t* to, intptr_t *RegArgSig) : NativeSignatureIterator(method) { + _from = from; + _to = to; + _RegArgSignature = RegArgSig; + *_RegArgSignature = 0; + _argcount = method->is_static() ? 2 : 1; + } +}; + + +JRT_ENTRY(address, InterpreterRuntime::slow_signature_handler( + JavaThread* current, + Method* method, + intptr_t* from, + intptr_t* to )) + methodHandle m(current, method); + assert(m->is_native(), "sanity check"); + // handle arguments + // Warning: We use reg arg slot 00 temporarily to return the RegArgSignature + // back to the code that pops the arguments into the CPU registers + SlowSignatureHandler(m, (address)from, m->is_static() ? to+2 : to+1, to).iterate((uint64_t)CONST64(-1)); + // return result handler + return Interpreter::result_handler(m->result_type()); +JRT_END diff -ur --new-file a/src/hotspot/cpu/sparc/interpreterRT_sparc.hpp b/src/hotspot/cpu/sparc/interpreterRT_sparc.hpp --- a/src/hotspot/cpu/sparc/interpreterRT_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/interpreterRT_sparc.hpp 2023-04-16 11:42:11.065305807 +0000 @@ -0,0 +1,63 @@ +/* + * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_INTERPRETERRT_SPARC_HPP +#define CPU_SPARC_INTERPRETERRT_SPARC_HPP + +// This is included in the middle of class Interpreter. +// Do not include files here. + + +static int binary_search(int key, LookupswitchPair* array, int n); + +static address iload (JavaThread* thread); +static address aload (JavaThread* thread); +static address istore(JavaThread* thread); +static address astore(JavaThread* thread); +static address iinc (JavaThread* thread); + + + +// native method calls + +class SignatureHandlerGenerator: public NativeSignatureIterator { + private: + MacroAssembler* _masm; + + void pass_word(int size_of_arg, int offset_in_arg); + void pass_int() { pass_word(1, 0); } + void pass_long(); + void pass_double(); + void pass_float(); + void pass_object(); + + public: + // Creation + SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer); + + // Code generation + void generate( uint64_t fingerprint ); +}; + +#endif // CPU_SPARC_INTERPRETERRT_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/javaFrameAnchor_sparc.hpp b/src/hotspot/cpu/sparc/javaFrameAnchor_sparc.hpp --- a/src/hotspot/cpu/sparc/javaFrameAnchor_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/javaFrameAnchor_sparc.hpp 2023-04-16 11:42:11.065450365 +0000 @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_JAVAFRAMEANCHOR_SPARC_HPP +#define CPU_SPARC_JAVAFRAMEANCHOR_SPARC_HPP + +private: + volatile int _flags; + +public: + + enum pd_Constants { + flushed = 1 // winodows have flushed + }; + + int flags(void) { return _flags; } + void set_flags(int flags) { _flags = flags; } + + static ByteSize flags_offset() { return byte_offset_of(JavaFrameAnchor, _flags); } + + // Each arch must define clear, copy + // These are used by objects that only care about: + // 1 - initializing a new state (thread creation, javaCalls) + // 2 - saving a current state (javaCalls) + // 3 - restoring an old state (javaCalls) + + void clear(void) { + // clearing _last_Java_sp must be first + _last_Java_sp = NULL; + // fence? + _flags = 0; + _last_Java_pc = NULL; + } + + void copy(JavaFrameAnchor* src) { + // In order to make sure the transition state is valid for "this" + // We must clear _last_Java_sp before copying the rest of the new data + // + // Hack Alert: Temporary bugfix for 4717480/4721647 + // To act like previous version (pd_cache_state) don't NULL _last_Java_sp + // unless the value is changing + // + if (_last_Java_sp != src->_last_Java_sp) + _last_Java_sp = NULL; + + _flags = src->_flags; + _last_Java_pc = src->_last_Java_pc; + // Must be last so profiler will always see valid frame if has_last_frame() is true + _last_Java_sp = src->_last_Java_sp; + } + + // Is stack walkable + inline bool walkable( void) { + return _flags & flushed; + } + + void make_walkable(JavaThread* thread); + + void set_last_Java_sp(intptr_t* sp) { _last_Java_sp = sp; } + + address last_Java_pc(void) { return _last_Java_pc; } + + intptr_t* last_Java_sp() const { + // _last_Java_sp will always be a an unbiased stack pointer + // if is is biased then some setter screwed up. This is + // deadly. + assert(((intptr_t)_last_Java_sp & 0xF) == 0, "Biased last_Java_sp"); + return _last_Java_sp; + } + + // These are only used by friends +private: + + void capture_last_Java_pc(intptr_t* sp); + + void set_window_flushed( void) { + _flags |= flushed; + OrderAccess::fence(); + } + +#endif // CPU_SPARC_JAVAFRAMEANCHOR_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/jniFastGetField_sparc.cpp b/src/hotspot/cpu/sparc/jniFastGetField_sparc.cpp --- a/src/hotspot/cpu/sparc/jniFastGetField_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/jniFastGetField_sparc.cpp 2023-04-16 11:42:11.065637371 +0000 @@ -0,0 +1,290 @@ +/* + * Copyright (c) 2004, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "memory/resourceArea.hpp" +#include "prims/jniFastGetField.hpp" +#include "prims/jvm_misc.hpp" +#include "prims/jvmtiExport.hpp" +#include "runtime/safepoint.hpp" + +// TSO ensures that loads are blocking and ordered with respect to +// to earlier loads, so we don't need LoadLoad membars. + +#define __ masm-> + +#define BUFFER_SIZE 30*sizeof(jint) + +// Common register usage: +// O0: env +// O1: obj +// O2: jfieldID +// O4: offset (O2 >> 2) +// G4: old safepoint counter + +address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { + const char *name; + switch (type) { + case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; + case T_BYTE: name = "jni_fast_GetByteField"; break; + case T_CHAR: name = "jni_fast_GetCharField"; break; + case T_SHORT: name = "jni_fast_GetShortField"; break; + case T_INT: name = "jni_fast_GetIntField"; break; + default: ShouldNotReachHere(); + } + ResourceMark rm; + BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE*wordSize); + CodeBuffer cbuf(blob); + MacroAssembler* masm = new MacroAssembler(&cbuf); + address fast_entry = __ pc(); + + Label label1, label2; + + AddressLiteral cnt_addrlit(SafepointSynchronize::safepoint_counter_addr()); + __ sethi (cnt_addrlit, O3); + Address cnt_addr(O3, cnt_addrlit.low10()); + __ ld (cnt_addr, G4); + __ andcc (G4, 1, G0); + __ br (Assembler::notZero, false, Assembler::pn, label1); + __ delayed()->srl (O2, 2, O4); + + if (JvmtiExport::can_post_field_access()) { + // Check to see if a field access watch has been set before we + // take the fast path. + AddressLiteral get_field_access_count_addr(JvmtiExport::get_field_access_count_addr()); + __ load_contents(get_field_access_count_addr, O5); + __ cmp_and_br_short(O5, 0, Assembler::notEqual, Assembler::pn, label1); + } + + __ mov(O1, O5); + + // Both O5 and G3 are clobbered by try_resolve_jobject_in_native. + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->try_resolve_jobject_in_native(masm, /* jni_env */ O0, /* obj */ O5, /* tmp */ G3, label1); + + assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); + speculative_load_pclist[count] = __ pc(); + switch (type) { + case T_BOOLEAN: __ ldub (O5, O4, G3); break; + case T_BYTE: __ ldsb (O5, O4, G3); break; + case T_CHAR: __ lduh (O5, O4, G3); break; + case T_SHORT: __ ldsh (O5, O4, G3); break; + case T_INT: __ ld (O5, O4, G3); break; + default: ShouldNotReachHere(); + } + + __ ld (cnt_addr, O5); + __ cmp (O5, G4); + __ br (Assembler::notEqual, false, Assembler::pn, label2); + __ delayed()->mov (O7, G1); + __ retl (); + __ delayed()->mov (G3, O0); + + slowcase_entry_pclist[count++] = __ pc(); + __ bind (label1); + __ mov (O7, G1); + + address slow_case_addr; + switch (type) { + case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; + case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; + case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; + case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; + case T_INT: slow_case_addr = jni_GetIntField_addr(); break; + default: ShouldNotReachHere(); + } + __ bind (label2); + __ call (slow_case_addr, relocInfo::none); + __ delayed()->mov (G1, O7); + + __ flush (); + + return fast_entry; +} + +address JNI_FastGetField::generate_fast_get_boolean_field() { + return generate_fast_get_int_field0(T_BOOLEAN); +} + +address JNI_FastGetField::generate_fast_get_byte_field() { + return generate_fast_get_int_field0(T_BYTE); +} + +address JNI_FastGetField::generate_fast_get_char_field() { + return generate_fast_get_int_field0(T_CHAR); +} + +address JNI_FastGetField::generate_fast_get_short_field() { + return generate_fast_get_int_field0(T_SHORT); +} + +address JNI_FastGetField::generate_fast_get_int_field() { + return generate_fast_get_int_field0(T_INT); +} + +address JNI_FastGetField::generate_fast_get_long_field() { + const char *name = "jni_fast_GetLongField"; + ResourceMark rm; + BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE*wordSize); + CodeBuffer cbuf(blob); + MacroAssembler* masm = new MacroAssembler(&cbuf); + address fast_entry = __ pc(); + + Label label1, label2; + + AddressLiteral cnt_addrlit(SafepointSynchronize::safepoint_counter_addr()); + __ sethi (cnt_addrlit, G3); + Address cnt_addr(G3, cnt_addrlit.low10()); + __ ld (cnt_addr, G4); + __ andcc (G4, 1, G0); + __ br (Assembler::notZero, false, Assembler::pn, label1); + __ delayed()->srl (O2, 2, O4); + + if (JvmtiExport::can_post_field_access()) { + // Check to see if a field access watch has been set before we + // take the fast path. + AddressLiteral get_field_access_count_addr(JvmtiExport::get_field_access_count_addr()); + __ load_contents(get_field_access_count_addr, O5); + __ cmp_and_br_short(O5, 0, Assembler::notEqual, Assembler::pn, label1); + } + + __ mov(O1, O5); + + // Both O5 and G1 are clobbered by try_resolve_jobject_in_native. + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->try_resolve_jobject_in_native(masm, /* jni_env */ O0, /* obj */ O5, /* tmp */ G1, label1); + DEBUG_ONLY(__ set(0xDEADC0DE, G1);) + + __ add (O5, O4, O5); + + assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); + speculative_load_pclist[count] = __ pc(); + __ ldx (O5, 0, O3); + + __ ld (cnt_addr, G1); + __ cmp (G1, G4); + __ br (Assembler::notEqual, false, Assembler::pn, label2); + __ delayed()->mov (O7, G1); + + __ retl (); + __ delayed()->mov (O3, O0); + + slowcase_entry_pclist[count++] = __ pc(); + + __ bind (label1); + __ mov (O7, G1); + + address slow_case_addr = jni_GetLongField_addr(); + __ bind (label2); + __ call (slow_case_addr, relocInfo::none); + __ delayed()->mov (G1, O7); + + __ flush (); + + return fast_entry; +} + +address JNI_FastGetField::generate_fast_get_float_field0(BasicType type) { + const char *name; + switch (type) { + case T_FLOAT: name = "jni_fast_GetFloatField"; break; + case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; + default: ShouldNotReachHere(); + } + ResourceMark rm; + BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE*wordSize); + CodeBuffer cbuf(blob); + MacroAssembler* masm = new MacroAssembler(&cbuf); + address fast_entry = __ pc(); + + Label label1, label2; + + AddressLiteral cnt_addrlit(SafepointSynchronize::safepoint_counter_addr()); + __ sethi (cnt_addrlit, O3); + Address cnt_addr(O3, cnt_addrlit.low10()); + __ ld (cnt_addr, G4); + __ andcc (G4, 1, G0); + __ br (Assembler::notZero, false, Assembler::pn, label1); + __ delayed()->srl (O2, 2, O4); + + if (JvmtiExport::can_post_field_access()) { + // Check to see if a field access watch has been set before we + // take the fast path. + AddressLiteral get_field_access_count_addr(JvmtiExport::get_field_access_count_addr()); + __ load_contents(get_field_access_count_addr, O5); + __ cmp_and_br_short(O5, 0, Assembler::notEqual, Assembler::pn, label1); + } + + __ mov(O1, O5); + + // Both O5 and G3 are clobbered by try_resolve_jobject_in_native. + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->try_resolve_jobject_in_native(masm, /* jni_env */ O0, /* obj */ O5, /* tmp */ G3, label1); + DEBUG_ONLY(__ set(0xDEADC0DE, G3);) + + assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); + speculative_load_pclist[count] = __ pc(); + switch (type) { + case T_FLOAT: __ ldf (FloatRegisterImpl::S, O5, O4, F0); break; + case T_DOUBLE: __ ldf (FloatRegisterImpl::D, O5, O4, F0); break; + default: ShouldNotReachHere(); + } + + __ ld (cnt_addr, O5); + __ cmp (O5, G4); + __ br (Assembler::notEqual, false, Assembler::pn, label2); + __ delayed()->mov (O7, G1); + + __ retl (); + __ delayed()-> nop (); + + slowcase_entry_pclist[count++] = __ pc(); + __ bind (label1); + __ mov (O7, G1); + + address slow_case_addr; + switch (type) { + case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; + case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; + default: ShouldNotReachHere(); + } + __ bind (label2); + __ call (slow_case_addr, relocInfo::none); + __ delayed()->mov (G1, O7); + + __ flush (); + + return fast_entry; +} + +address JNI_FastGetField::generate_fast_get_float_field() { + return generate_fast_get_float_field0(T_FLOAT); +} + +address JNI_FastGetField::generate_fast_get_double_field() { + return generate_fast_get_float_field0(T_DOUBLE); +} diff -ur --new-file a/src/hotspot/cpu/sparc/jniTypes_sparc.hpp b/src/hotspot/cpu/sparc/jniTypes_sparc.hpp --- a/src/hotspot/cpu/sparc/jniTypes_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/jniTypes_sparc.hpp 2023-04-16 11:42:11.065783209 +0000 @@ -0,0 +1,90 @@ +/* + * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_JNITYPES_SPARC_HPP +#define CPU_SPARC_JNITYPES_SPARC_HPP + +#include "jni.h" +#include "memory/allocation.hpp" +#include "oops/oop.hpp" + +// This file holds platform-dependent routines used to write primitive jni +// types to the array of arguments passed into JavaCalls::call + +class JNITypes : AllStatic { + // These functions write a java primitive type (in native format) + // to a java stack slot array to be passed as an argument to JavaCalls:calls. + // I.e., they are functionally 'push' operations if they have a 'pos' + // formal parameter. Note that jlong's and jdouble's are written + // _in reverse_ of the order in which they appear in the interpreter + // stack. This is because call stubs (see stubGenerator_sparc.cpp) + // reverse the argument list constructed by JavaCallArguments (see + // javaCalls.hpp). + +private: + // Helper routines. + static inline void put_int2 (jint *from, jint *to) { to[0] = from[0]; to[1] = from[1]; } + static inline void put_int2 (jint *from, jint *to, int& pos) { put_int2 (from, (jint *)((intptr_t *)to + pos)); pos += 2; } + static inline void put_int2r(jint *from, jint *to) { to[0] = from[1]; to[1] = from[0]; } + static inline void put_int2r(jint *from, jint *to, int& pos) { put_int2r(from, (jint *)((intptr_t *)to + pos)); pos += 2; } + +public: + // Ints are stored in native format in one JavaCallArgument slot at *to. + static inline void put_int(jint from, intptr_t *to) { *(jint *)(to + 0 ) = from; } + static inline void put_int(jint from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = from; } + static inline void put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; } + + // Longs are stored in native format in one JavaCallArgument slot at *(to+1). + static inline void put_long(jlong from, intptr_t *to) { *(jlong *)(to + 1 + 0) = from; } + static inline void put_long(jlong from, intptr_t *to, int& pos) { *(jlong *)(to + 1 + pos) = from; pos += 2; } + static inline void put_long(jlong *from, intptr_t *to, int& pos) { *(jlong *)(to + 1 + pos) = *from; pos += 2; } + + // Oops are stored in native format in one JavaCallArgument slot at *to. + static inline void put_obj(const Handle& from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle.raw_value(); } + static inline void put_obj(jobject from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle; } + + // Floats are stored in native format in one JavaCallArgument slot at *to. + static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } + static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } + static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } + + // Doubles are stored in native word format in one JavaCallArgument slot at *(to+1). + static inline void put_double(jdouble from, intptr_t *to) { *(jdouble *)(to + 1 + 0) = from; } + static inline void put_double(jdouble from, intptr_t *to, int& pos) { *(jdouble *)(to + 1 + pos) = from; pos += 2; } + static inline void put_double(jdouble *from, intptr_t *to, int& pos) { *(jdouble *)(to + 1 + pos) = *from; pos += 2; } + + // The get_xxx routines, on the other hand, actually _do_ fetch + // java primitive types from the interpreter stack. + static inline jint get_int(intptr_t *from) { return *(jint *)from; } + + static inline jlong get_long(intptr_t *from) { return *(jlong *)from; } + + static inline oop get_obj(intptr_t *from) { return *(oop *)from; } + static inline jfloat get_float(intptr_t *from) { return *(jfloat *)from; } + + static inline jdouble get_double(intptr_t *from) { return *(jdouble *)from; } + +}; + +#endif // CPU_SPARC_JNITYPES_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/jvmciCodeInstaller_sparc.cpp b/src/hotspot/cpu/sparc/jvmciCodeInstaller_sparc.cpp --- a/src/hotspot/cpu/sparc/jvmciCodeInstaller_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/jvmciCodeInstaller_sparc.cpp 2023-04-16 11:42:11.065956085 +0000 @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2013, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "jvmci/jvmciCodeInstaller.hpp" +#include "jvmci/jvmciRuntime.hpp" +#include "jvmci/jvmciCompilerToVM.hpp" +#include "jvmci/jvmciJavaClasses.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "utilities/align.hpp" +#include "vmreg_sparc.inline.hpp" + +jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, JVMCIObject method, JVMCI_TRAPS) { + if (inst->is_call() || inst->is_jump()) { + return pc_offset + NativeCall::instruction_size; + } else if (inst->is_call_reg()) { + return pc_offset + NativeCallReg::instruction_size; + } else if (inst->is_sethi()) { + return pc_offset + NativeFarCall::instruction_size; + } else { + JVMCI_ERROR_0("unsupported type of instruction for call site"); + return 0; + } +} + +void CodeInstaller::pd_patch_OopConstant(int pc_offset, JVMCIObject constant, JVMCI_TRAPS) { + address pc = _instructions->start() + pc_offset; + Handle obj = jvmci_env()->asConstant(constant, JVMCI_CHECK); + jobject value = JNIHandles::make_local(obj()); + if (jvmci_env()->get_HotSpotObjectConstantImpl_compressed(constant)) { + int oop_index = _oop_recorder->find_index(value); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + _instructions->relocate(pc, rspec, 1); + } else { + NativeMovConstReg* move = nativeMovConstReg_at(pc); + move->set_data((intptr_t) value); + + // We need two relocations: one on the sethi and one on the add. + int oop_index = _oop_recorder->find_index(value); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + _instructions->relocate(pc + NativeMovConstReg::sethi_offset, rspec); + _instructions->relocate(pc + NativeMovConstReg::add_offset, rspec); + } +} + +void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, JVMCIObject constant, JVMCI_TRAPS) { + address pc = _instructions->start() + pc_offset; + if (jvmci_env()->get_HotSpotMetaspaceConstantImpl_compressed(constant)) { + NativeMovConstReg32* move = nativeMovConstReg32_at(pc); + narrowKlass narrowOop = record_narrow_metadata_reference(_instructions, pc, constant, JVMCI_CHECK); + move->set_data((intptr_t)narrowOop); + TRACE_jvmci_3("relocating (narrow metaspace constant) at " PTR_FORMAT "/0x%x", p2i(pc), narrowOop); + } else { + NativeMovConstReg* move = nativeMovConstReg_at(pc); + void* reference = record_metadata_reference(_instructions, pc, constant, JVMCI_CHECK); + move->set_data((intptr_t)reference); + TRACE_jvmci_3("relocating (metaspace constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(reference)); + } +} + +void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, JVMCI_TRAPS) { + address pc = _instructions->start() + pc_offset; + NativeInstruction* inst = nativeInstruction_at(pc); + NativeInstruction* inst1 = nativeInstruction_at(pc + 4); + if(inst->is_sethi() && inst1->is_nop()) { + address const_start = _constants->start(); + address dest = _constants->start() + data_offset; + if(_constants_size > 0) { + _instructions->relocate(pc + NativeMovConstReg::sethi_offset, internal_word_Relocation::spec((address) dest)); + _instructions->relocate(pc + NativeMovConstReg::add_offset, internal_word_Relocation::spec((address) dest)); + } + TRACE_jvmci_3("relocating at " PTR_FORMAT " (+%d) with destination at %d", p2i(pc), pc_offset, data_offset); + }else { + int const_size = align_up(_constants->end()-_constants->start(), CodeEntryAlignment); + NativeMovRegMem* load = nativeMovRegMem_at(pc); + // This offset must match with SPARCLoadConstantTableBaseOp.emitCode + // -4096 was Assembler::min_simm13() + load->set_offset(- (const_size - data_offset - 4096)); + TRACE_jvmci_3("relocating ld at " PTR_FORMAT " (+%d) with destination at %d", p2i(pc), pc_offset, data_offset); + } +} + +void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, JVMCI_TRAPS) { + address pc = (address) inst; + if (inst->is_call()) { + NativeCall* call = nativeCall_at(pc); + call->set_destination((address) foreign_call_destination); + _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec()); + } else if (inst->is_sethi()) { + NativeJump* jump = nativeJump_at(pc); + jump->set_jump_destination((address) foreign_call_destination); + _instructions->relocate(jump->instruction_address(), runtime_call_Relocation::spec()); + } else { + JVMCI_ERROR("unknown call or jump instruction at " PTR_FORMAT, p2i(pc)); + } + TRACE_jvmci_3("relocating (foreign call) at " PTR_FORMAT, p2i(inst)); +} + +void CodeInstaller::pd_relocate_JavaMethod(CodeBuffer &, JVMCIObject hotspot_method, jint pc_offset, JVMCI_TRAPS) { +#ifdef ASSERT + Method* method = NULL; + // we need to check, this might also be an unresolved method + if (JVMCIENV->isa_HotSpotResolvedJavaMethodImpl(hotspot_method)) { + method = JVMCIENV->asMethod(hotspot_method); + } +#endif + switch (_next_call_type) { + case INLINE_INVOKE: + break; + case INVOKEVIRTUAL: + case INVOKEINTERFACE: { + assert(method == NULL || !method->is_static(), "cannot call static method with invokeinterface"); + NativeCall* call = nativeCall_at(_instructions->start() + pc_offset); + call->set_destination(SharedRuntime::get_resolve_virtual_call_stub()); + _instructions->relocate(call->instruction_address(), virtual_call_Relocation::spec(_invoke_mark_pc)); + break; + } + case INVOKESTATIC: { + assert(method == NULL || method->is_static(), "cannot call non-static method with invokestatic"); + NativeCall* call = nativeCall_at(_instructions->start() + pc_offset); + call->set_destination(SharedRuntime::get_resolve_static_call_stub()); + _instructions->relocate(call->instruction_address(), relocInfo::static_call_type); + break; + } + case INVOKESPECIAL: { + assert(method == NULL || !method->is_static(), "cannot call static method with invokespecial"); + NativeCall* call = nativeCall_at(_instructions->start() + pc_offset); + call->set_destination(SharedRuntime::get_resolve_opt_virtual_call_stub()); + _instructions->relocate(call->instruction_address(), relocInfo::opt_virtual_call_type); + break; + } + default: + JVMCI_ERROR("invalid _next_call_type value"); + break; + } +} + +void CodeInstaller::pd_relocate_poll(address pc, jint mark, JVMCI_TRAPS) { + switch (mark) { + case POLL_NEAR: + JVMCI_ERROR("unimplemented"); + break; + case POLL_FAR: + _instructions->relocate(pc, relocInfo::poll_type); + break; + case POLL_RETURN_NEAR: + JVMCI_ERROR("unimplemented"); + break; + case POLL_RETURN_FAR: + _instructions->relocate(pc, relocInfo::poll_return_type); + break; + default: + JVMCI_ERROR("invalid mark value"); + break; + } +} + +// convert JVMCI register indices (as used in oop maps) to HotSpot registers +VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, JVMCI_TRAPS) { + // JVMCI Registers are numbered as follows: + // 0..31: Thirty-two General Purpose registers (CPU Registers) + // 32..63: Thirty-two single precision float registers + // 64..95: Thirty-two double precision float registers + // 96..111: Sixteen quad precision float registers + if (jvmci_reg < 32) { + return as_Register(jvmci_reg)->as_VMReg(); + } else { + jint floatRegisterNumber; + if(jvmci_reg < 64) { // Single precision + floatRegisterNumber = jvmci_reg - 32; + } else if(jvmci_reg < 96) { + floatRegisterNumber = 2 * (jvmci_reg - 64); + } else if(jvmci_reg < 112) { + floatRegisterNumber = 4 * (jvmci_reg - 96); + } else { + JVMCI_ERROR_NULL("invalid register number: %d", jvmci_reg); + } + return as_FloatRegister(floatRegisterNumber)->as_VMReg(); + } +} + +bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) { + return !hotspotRegister->is_FloatRegister(); +} diff -ur --new-file a/src/hotspot/cpu/sparc/macroAssembler_sparc.cpp b/src/hotspot/cpu/sparc/macroAssembler_sparc.cpp --- a/src/hotspot/cpu/sparc/macroAssembler_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/macroAssembler_sparc.cpp 2023-04-16 11:42:11.067194557 +0000 @@ -0,0 +1,3928 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "jvm.h" +#include "asm/macroAssembler.inline.hpp" +#include "compiler/disassembler.hpp" +#include "gc/shared/collectedHeap.inline.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "memory/universe.hpp" +#include "oops/accessDecorators.hpp" +#include "oops/compressedOops.hpp" +#include "oops/klass.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/flags/flagSetting.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/jniHandles.inline.hpp" +#include "runtime/objectMonitor.hpp" +#include "runtime/os.inline.hpp" +#include "runtime/safepoint.hpp" +#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/align.hpp" +#include "utilities/macros.hpp" +#include "utilities/powerOfTwo.hpp" + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#define STOP(error) stop(error) +#else +#define BLOCK_COMMENT(str) block_comment(str) +#define STOP(error) block_comment(error); stop(error) +#endif + +// Convert the raw encoding form into the form expected by the +// constructor for Address. +Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) { + assert(scale == 0, "not supported"); + RelocationHolder rspec; + if (disp_reloc != relocInfo::none) { + rspec = Relocation::spec_simple(disp_reloc); + } + + Register rindex = as_Register(index); + if (rindex != G0) { + Address madr(as_Register(base), rindex); + madr._rspec = rspec; + return madr; + } else { + Address madr(as_Register(base), disp); + madr._rspec = rspec; + return madr; + } +} + +Address Argument::address_in_frame() const { + // Warning: In LP64 mode disp will occupy more than 10 bits, but + // op codes such as ld or ldx, only access disp() to get + // their simm13 argument. + int disp = ((_number - Argument::n_register_parameters + frame::memory_parameter_word_sp_offset) * BytesPerWord) + STACK_BIAS; + if (is_in()) + return Address(FP, disp); // In argument. + else + return Address(SP, disp); // Out argument. +} + +static const char* argumentNames[][2] = { + {"A0","P0"}, {"A1","P1"}, {"A2","P2"}, {"A3","P3"}, {"A4","P4"}, + {"A5","P5"}, {"A6","P6"}, {"A7","P7"}, {"A8","P8"}, {"A9","P9"}, + {"A(n>9)","P(n>9)"} +}; + +const char* Argument::name() const { + int nofArgs = sizeof argumentNames / sizeof argumentNames[0]; + int num = number(); + if (num >= nofArgs) num = nofArgs - 1; + return argumentNames[num][is_in() ? 1 : 0]; +} + +#ifdef ASSERT +// On RISC, there's no benefit to verifying instruction boundaries. +bool AbstractAssembler::pd_check_instruction_mark() { return false; } +#endif + +// Patch instruction inst at offset inst_pos to refer to dest_pos +// and return the resulting instruction. +// We should have pcs, not offsets, but since all is relative, it will work out +// OK. +int MacroAssembler::patched_branch(int dest_pos, int inst, int inst_pos) { + int m; // mask for displacement field + int v; // new value for displacement field + const int word_aligned_ones = -4; + switch (inv_op(inst)) { + default: ShouldNotReachHere(); + case call_op: m = wdisp(word_aligned_ones, 0, 30); v = wdisp(dest_pos, inst_pos, 30); break; + case branch_op: + switch (inv_op2(inst)) { + case fbp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break; + case bp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break; + case fb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; + case br_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; + case bpr_op2: { + if (is_cbcond(inst)) { + m = wdisp10(word_aligned_ones, 0); + v = wdisp10(dest_pos, inst_pos); + } else { + m = wdisp16(word_aligned_ones, 0); + v = wdisp16(dest_pos, inst_pos); + } + break; + } + default: ShouldNotReachHere(); + } + } + return inst & ~m | v; +} + +// Return the offset of the branch destination of instruction inst +// at offset pos. +// Should have pcs, but since all is relative, it works out. +int MacroAssembler::branch_destination(int inst, int pos) { + int r; + switch (inv_op(inst)) { + default: ShouldNotReachHere(); + case call_op: r = inv_wdisp(inst, pos, 30); break; + case branch_op: + switch (inv_op2(inst)) { + case fbp_op2: r = inv_wdisp( inst, pos, 19); break; + case bp_op2: r = inv_wdisp( inst, pos, 19); break; + case fb_op2: r = inv_wdisp( inst, pos, 22); break; + case br_op2: r = inv_wdisp( inst, pos, 22); break; + case bpr_op2: { + if (is_cbcond(inst)) { + r = inv_wdisp10(inst, pos); + } else { + r = inv_wdisp16(inst, pos); + } + break; + } + default: ShouldNotReachHere(); + } + } + return r; +} + +void MacroAssembler::resolve_jobject(Register value, Register tmp) { + Label done, not_weak; + br_null(value, false, Assembler::pn, done); // Use NULL as-is. + delayed()->andcc(value, JNIHandles::weak_tag_mask, G0); // Test for jweak + brx(Assembler::zero, true, Assembler::pt, not_weak); + delayed()->nop(); + access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, + Address(value, -JNIHandles::weak_tag_value), value, tmp); + verify_oop(value); + br (Assembler::always, true, Assembler::pt, done); + delayed()->nop(); + bind(not_weak); + access_load_at(T_OBJECT, IN_NATIVE, Address(value, 0), value, tmp); + verify_oop(value); + bind(done); +} + +void MacroAssembler::null_check(Register reg, int offset) { + if (needs_explicit_null_check((intptr_t)offset)) { + // provoke OS NULL exception if reg = NULL by + // accessing M[reg] w/o changing any registers + ld_ptr(reg, 0, G0); + } + else { + // nothing to do, (later) access of M[reg + offset] + // will provoke OS NULL exception if reg = NULL + } +} + +// Ring buffer jumps + + +void MacroAssembler::jmp2(Register r1, Register r2, const char* file, int line ) { + assert_not_delayed(); + jmpl(r1, r2, G0); +} +void MacroAssembler::jmp(Register r1, int offset, const char* file, int line ) { + assert_not_delayed(); + jmp(r1, offset); +} + +// This code sequence is relocatable to any address, even on LP64. +void MacroAssembler::jumpl(const AddressLiteral& addrlit, Register temp, Register d, int offset, const char* file, int line) { + assert_not_delayed(); + // Force fixed length sethi because NativeJump and NativeFarCall don't handle + // variable length instruction streams. + patchable_sethi(addrlit, temp); + Address a(temp, addrlit.low10() + offset); // Add the offset to the displacement. + jmpl(a.base(), a.disp(), d); +} + +void MacroAssembler::jump(const AddressLiteral& addrlit, Register temp, int offset, const char* file, int line) { + jumpl(addrlit, temp, G0, offset, file, line); +} + + +// Conditional breakpoint (for assertion checks in assembly code) +void MacroAssembler::breakpoint_trap(Condition c, CC cc) { + trap(c, cc, G0, ST_RESERVED_FOR_USER_0); +} + +// We want to use ST_BREAKPOINT here, but the debugger is confused by it. +void MacroAssembler::breakpoint_trap() { + trap(ST_RESERVED_FOR_USER_0); +} + +void MacroAssembler::safepoint_poll(Label& slow_path, bool a, Register thread_reg, Register temp_reg) { + ldx(Address(thread_reg, JavaThread::polling_page_offset()), temp_reg, 0); + // Armed page has poll bit set. + and3(temp_reg, SafepointMechanism::poll_bit(), temp_reg); + br_notnull(temp_reg, a, Assembler::pn, slow_path); +} + +void MacroAssembler::enter() { + Unimplemented(); +} + +void MacroAssembler::leave() { + Unimplemented(); +} + +// Calls to C land + +#ifdef ASSERT +// a hook for debugging +static Thread* reinitialize_thread() { + return Thread::current(); +} +#else +#define reinitialize_thread Thread::current +#endif + +#ifdef ASSERT +address last_get_thread = NULL; +#endif + +// call this when G2_thread is not known to be valid +void MacroAssembler::get_thread() { + save_frame(0); // to avoid clobbering O0 + mov(G1, L0); // avoid clobbering G1 + mov(G5_method, L1); // avoid clobbering G5 + mov(G3, L2); // avoid clobbering G3 also + mov(G4, L5); // avoid clobbering G4 +#ifdef ASSERT + AddressLiteral last_get_thread_addrlit(&last_get_thread); + set(last_get_thread_addrlit, L3); + rdpc(L4); + inc(L4, 3 * BytesPerInstWord); // skip rdpc + inc + st_ptr to point L4 at call st_ptr(L4, L3, 0); +#endif + call(CAST_FROM_FN_PTR(address, reinitialize_thread), relocInfo::runtime_call_type); + delayed()->nop(); + mov(L0, G1); + mov(L1, G5_method); + mov(L2, G3); + mov(L5, G4); + restore(O0, 0, G2_thread); +} + +static Thread* verify_thread_subroutine(Thread* gthread_value) { + Thread* correct_value = Thread::current(); + guarantee(gthread_value == correct_value, "G2_thread value must be the thread"); + return correct_value; +} + +void MacroAssembler::verify_thread() { + if (VerifyThread) { + // NOTE: this chops off the heads of the 64-bit O registers. + // make sure G2_thread contains the right value + save_frame_and_mov(0, Lmethod, Lmethod); // to avoid clobbering O0 (and propagate Lmethod) + mov(G1, L1); // avoid clobbering G1 + // G2 saved below + mov(G3, L3); // avoid clobbering G3 + mov(G4, L4); // avoid clobbering G4 + mov(G5_method, L5); // avoid clobbering G5_method + call(CAST_FROM_FN_PTR(address,verify_thread_subroutine), relocInfo::runtime_call_type); + delayed()->mov(G2_thread, O0); + + mov(L1, G1); // Restore G1 + // G2 restored below + mov(L3, G3); // restore G3 + mov(L4, G4); // restore G4 + mov(L5, G5_method); // restore G5_method + restore(O0, 0, G2_thread); + } +} + + +void MacroAssembler::save_thread(const Register thread_cache) { + verify_thread(); + if (thread_cache->is_valid()) { + assert(thread_cache->is_local() || thread_cache->is_in(), "bad volatile"); + mov(G2_thread, thread_cache); + } + if (VerifyThread) { + // smash G2_thread, as if the VM were about to anyway + set(0x67676767, G2_thread); + } +} + + +void MacroAssembler::restore_thread(const Register thread_cache) { + if (thread_cache->is_valid()) { + assert(thread_cache->is_local() || thread_cache->is_in(), "bad volatile"); + mov(thread_cache, G2_thread); + verify_thread(); + } else { + // do it the slow way + get_thread(); + } +} + + +// %%% maybe get rid of [re]set_last_Java_frame +void MacroAssembler::set_last_Java_frame(Register last_java_sp, Register last_Java_pc) { + assert_not_delayed(); + Address flags(G2_thread, JavaThread::frame_anchor_offset() + + JavaFrameAnchor::flags_offset()); + Address pc_addr(G2_thread, JavaThread::last_Java_pc_offset()); + + // Always set last_Java_pc and flags first because once last_Java_sp is visible + // has_last_Java_frame is true and users will look at the rest of the fields. + // (Note: flags should always be zero before we get here so doesn't need to be set.) + +#ifdef ASSERT + // Verify that flags was zeroed on return to Java + Label PcOk; + save_frame(0); // to avoid clobbering O0 + ld_ptr(pc_addr, L0); + br_null_short(L0, Assembler::pt, PcOk); + STOP("last_Java_pc not zeroed before leaving Java"); + bind(PcOk); + + // Verify that flags was zeroed on return to Java + Label FlagsOk; + ld(flags, L0); + tst(L0); + br(Assembler::zero, false, Assembler::pt, FlagsOk); + delayed() -> restore(); + STOP("flags not zeroed before leaving Java"); + bind(FlagsOk); +#endif /* ASSERT */ + // + // When returning from calling out from Java mode the frame anchor's last_Java_pc + // will always be set to NULL. It is set here so that if we are doing a call to + // native (not VM) that we capture the known pc and don't have to rely on the + // native call having a standard frame linkage where we can find the pc. + + if (last_Java_pc->is_valid()) { + st_ptr(last_Java_pc, pc_addr); + } + +#ifdef ASSERT + // Make sure that we have an odd stack + Label StackOk; + andcc(last_java_sp, 0x01, G0); + br(Assembler::notZero, false, Assembler::pt, StackOk); + delayed()->nop(); + STOP("Stack Not Biased in set_last_Java_frame"); + bind(StackOk); +#endif // ASSERT + assert( last_java_sp != G4_scratch, "bad register usage in set_last_Java_frame"); + add( last_java_sp, STACK_BIAS, G4_scratch ); + st_ptr(G4_scratch, G2_thread, JavaThread::last_Java_sp_offset()); +} + +void MacroAssembler::reset_last_Java_frame(void) { + assert_not_delayed(); + + Address sp_addr(G2_thread, JavaThread::last_Java_sp_offset()); + Address pc_addr(G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); + Address flags (G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset()); + +#ifdef ASSERT + // check that it WAS previously set + save_frame_and_mov(0, Lmethod, Lmethod); // Propagate Lmethod to helper frame + ld_ptr(sp_addr, L0); + tst(L0); + breakpoint_trap(Assembler::zero, Assembler::ptr_cc); + restore(); +#endif // ASSERT + + st_ptr(G0, sp_addr); + // Always return last_Java_pc to zero + st_ptr(G0, pc_addr); + // Always null flags after return to Java + st(G0, flags); +} + + +void MacroAssembler::call_VM_base( + Register oop_result, + Register thread_cache, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) +{ + assert_not_delayed(); + + // determine last_java_sp register + if (!last_java_sp->is_valid()) { + last_java_sp = SP; + } + // debugging support + assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); + + // 64-bit last_java_sp is biased! + set_last_Java_frame(last_java_sp, noreg); + if (VerifyThread) mov(G2_thread, O0); // about to be smashed; pass early + save_thread(thread_cache); + // do the call + call(entry_point, relocInfo::runtime_call_type); + if (!VerifyThread) + delayed()->mov(G2_thread, O0); // pass thread as first argument + else + delayed()->nop(); // (thread already passed) + restore_thread(thread_cache); + reset_last_Java_frame(); + + // check for pending exceptions. use Gtemp as scratch register. + if (check_exceptions) { + check_and_forward_exception(Gtemp); + } + +#ifdef ASSERT + set(badHeapWordVal, G3); + set(badHeapWordVal, G4); + set(badHeapWordVal, G5); +#endif + + // get oop result if there is one and reset the value in the thread + if (oop_result->is_valid()) { + get_vm_result(oop_result); + } +} + +void MacroAssembler::check_and_forward_exception(Register scratch_reg) +{ + Label L; + + check_and_handle_popframe(scratch_reg); + check_and_handle_earlyret(scratch_reg); + + Address exception_addr(G2_thread, Thread::pending_exception_offset()); + ld_ptr(exception_addr, scratch_reg); + br_null_short(scratch_reg, pt, L); + // we use O7 linkage so that forward_exception_entry has the issuing PC + call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + delayed()->nop(); + bind(L); +} + + +void MacroAssembler::check_and_handle_popframe(Register scratch_reg) { +} + + +void MacroAssembler::check_and_handle_earlyret(Register scratch_reg) { +} + + +void MacroAssembler::call_VM(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { + call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions); +} + + +void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) { + // O0 is reserved for the thread + mov(arg_1, O1); + call_VM(oop_result, entry_point, 1, check_exceptions); +} + + +void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { + // O0 is reserved for the thread + mov(arg_1, O1); + mov(arg_2, O2); assert(arg_2 != O1, "smashed argument"); + call_VM(oop_result, entry_point, 2, check_exceptions); +} + + +void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { + // O0 is reserved for the thread + mov(arg_1, O1); + mov(arg_2, O2); assert(arg_2 != O1, "smashed argument"); + mov(arg_3, O3); assert(arg_3 != O1 && arg_3 != O2, "smashed argument"); + call_VM(oop_result, entry_point, 3, check_exceptions); +} + + + +// Note: The following call_VM overloadings are useful when a "save" +// has already been performed by a stub, and the last Java frame is +// the previous one. In that case, last_java_sp must be passed as FP +// instead of SP. + + +void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) { + call_VM_base(oop_result, noreg, last_java_sp, entry_point, number_of_arguments, check_exceptions); +} + + +void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) { + // O0 is reserved for the thread + mov(arg_1, O1); + call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); +} + + +void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { + // O0 is reserved for the thread + mov(arg_1, O1); + mov(arg_2, O2); assert(arg_2 != O1, "smashed argument"); + call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); +} + + +void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { + // O0 is reserved for the thread + mov(arg_1, O1); + mov(arg_2, O2); assert(arg_2 != O1, "smashed argument"); + mov(arg_3, O3); assert(arg_3 != O1 && arg_3 != O2, "smashed argument"); + call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); +} + + + +void MacroAssembler::call_VM_leaf_base(Register thread_cache, address entry_point, int number_of_arguments) { + assert_not_delayed(); + save_thread(thread_cache); + // do the call + call(entry_point, relocInfo::runtime_call_type); + delayed()->nop(); + restore_thread(thread_cache); +#ifdef ASSERT + set(badHeapWordVal, G3); + set(badHeapWordVal, G4); + set(badHeapWordVal, G5); +#endif +} + + +void MacroAssembler::call_VM_leaf(Register thread_cache, address entry_point, int number_of_arguments) { + call_VM_leaf_base(thread_cache, entry_point, number_of_arguments); +} + + +void MacroAssembler::call_VM_leaf(Register thread_cache, address entry_point, Register arg_1) { + mov(arg_1, O0); + call_VM_leaf(thread_cache, entry_point, 1); +} + + +void MacroAssembler::call_VM_leaf(Register thread_cache, address entry_point, Register arg_1, Register arg_2) { + mov(arg_1, O0); + mov(arg_2, O1); assert(arg_2 != O0, "smashed argument"); + call_VM_leaf(thread_cache, entry_point, 2); +} + + +void MacroAssembler::call_VM_leaf(Register thread_cache, address entry_point, Register arg_1, Register arg_2, Register arg_3) { + mov(arg_1, O0); + mov(arg_2, O1); assert(arg_2 != O0, "smashed argument"); + mov(arg_3, O2); assert(arg_3 != O0 && arg_3 != O1, "smashed argument"); + call_VM_leaf(thread_cache, entry_point, 3); +} + + +void MacroAssembler::get_vm_result(Register oop_result) { + verify_thread(); + Address vm_result_addr(G2_thread, JavaThread::vm_result_offset()); + ld_ptr( vm_result_addr, oop_result); + st_ptr(G0, vm_result_addr); + verify_oop(oop_result); +} + + +void MacroAssembler::get_vm_result_2(Register metadata_result) { + verify_thread(); + Address vm_result_addr_2(G2_thread, JavaThread::vm_result_2_offset()); + ld_ptr(vm_result_addr_2, metadata_result); + st_ptr(G0, vm_result_addr_2); +} + + +// We require that C code which does not return a value in vm_result will +// leave it undisturbed. +void MacroAssembler::set_vm_result(Register oop_result) { + verify_thread(); + Address vm_result_addr(G2_thread, JavaThread::vm_result_offset()); + verify_oop(oop_result); + +# ifdef ASSERT + // Check that we are not overwriting any other oop. + save_frame_and_mov(0, Lmethod, Lmethod); // Propagate Lmethod + ld_ptr(vm_result_addr, L0); + tst(L0); + restore(); + breakpoint_trap(notZero, Assembler::ptr_cc); + // } +# endif + + st_ptr(oop_result, vm_result_addr); +} + + +void MacroAssembler::ic_call(address entry, bool emit_delay, jint method_index) { + RelocationHolder rspec = virtual_call_Relocation::spec(pc(), method_index); + patchable_set((intptr_t)Universe::non_oop_word(), G5_inline_cache_reg); + relocate(rspec); + call(entry, relocInfo::none); + if (emit_delay) { + delayed()->nop(); + } +} + + +void MacroAssembler::internal_sethi(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) { + address save_pc; + int shiftcnt; +#ifdef VALIDATE_PIPELINE + assert_no_delay("Cannot put two instructions in delay-slot."); +#endif + v9_dep(); + save_pc = pc(); + + int msb32 = (int) (addrlit.value() >> 32); + int lsb32 = (int) (addrlit.value()); + + if (msb32 == 0 && lsb32 >= 0) { + Assembler::sethi(lsb32, d, addrlit.rspec()); + } + else if (msb32 == -1) { + Assembler::sethi(~lsb32, d, addrlit.rspec()); + xor3(d, ~low10(~0), d); + } + else { + Assembler::sethi(msb32, d, addrlit.rspec()); // msb 22-bits + if (msb32 & 0x3ff) // Any bits? + or3(d, msb32 & 0x3ff, d); // msb 32-bits are now in lsb 32 + if (lsb32 & 0xFFFFFC00) { // done? + if ((lsb32 >> 20) & 0xfff) { // Any bits set? + sllx(d, 12, d); // Make room for next 12 bits + or3(d, (lsb32 >> 20) & 0xfff, d); // Or in next 12 + shiftcnt = 0; // We already shifted + } + else + shiftcnt = 12; + if ((lsb32 >> 10) & 0x3ff) { + sllx(d, shiftcnt + 10, d); // Make room for last 10 bits + or3(d, (lsb32 >> 10) & 0x3ff, d); // Or in next 10 + shiftcnt = 0; + } + else + shiftcnt = 10; + sllx(d, shiftcnt + 10, d); // Shift leaving disp field 0'd + } + else + sllx(d, 32, d); + } + // Pad out the instruction sequence so it can be patched later. + if (ForceRelocatable || (addrlit.rtype() != relocInfo::none && + addrlit.rtype() != relocInfo::runtime_call_type)) { + while (pc() < (save_pc + (7 * BytesPerInstWord))) + nop(); + } +} + + +void MacroAssembler::sethi(const AddressLiteral& addrlit, Register d) { + internal_sethi(addrlit, d, false); +} + + +void MacroAssembler::patchable_sethi(const AddressLiteral& addrlit, Register d) { + internal_sethi(addrlit, d, true); +} + + +int MacroAssembler::insts_for_sethi(address a, bool worst_case) { + if (worst_case) return 7; + intptr_t iaddr = (intptr_t) a; + int msb32 = (int) (iaddr >> 32); + int lsb32 = (int) (iaddr); + int count; + if (msb32 == 0 && lsb32 >= 0) + count = 1; + else if (msb32 == -1) + count = 2; + else { + count = 2; + if (msb32 & 0x3ff) + count++; + if (lsb32 & 0xFFFFFC00 ) { + if ((lsb32 >> 20) & 0xfff) count += 2; + if ((lsb32 >> 10) & 0x3ff) count += 2; + } + } + return count; +} + +int MacroAssembler::worst_case_insts_for_set() { + return insts_for_sethi(NULL, true) + 1; +} + + +// Keep in sync with MacroAssembler::insts_for_internal_set +void MacroAssembler::internal_set(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) { + intptr_t value = addrlit.value(); + + if (!ForceRelocatable && addrlit.rspec().type() == relocInfo::none) { + // can optimize + if (-4096 <= value && value <= 4095) { + or3(G0, value, d); // setsw (this leaves upper 32 bits sign-extended) + return; + } + if (inv_hi22(hi22(value)) == value) { + sethi(addrlit, d); + return; + } + } + assert_no_delay("Cannot put two instructions in delay-slot."); + internal_sethi(addrlit, d, ForceRelocatable); + if (ForceRelocatable || addrlit.rspec().type() != relocInfo::none || addrlit.low10() != 0) { + add(d, addrlit.low10(), d, addrlit.rspec()); + } +} + +// Keep in sync with MacroAssembler::internal_set +int MacroAssembler::insts_for_internal_set(intptr_t value) { + // can optimize + if (-4096 <= value && value <= 4095) { + return 1; + } + if (inv_hi22(hi22(value)) == value) { + return insts_for_sethi((address) value); + } + int count = insts_for_sethi((address) value); + AddressLiteral al(value); + if (al.low10() != 0) { + count++; + } + return count; +} + +void MacroAssembler::set(const AddressLiteral& al, Register d) { + internal_set(al, d, false); +} + +void MacroAssembler::set(intptr_t value, Register d) { + AddressLiteral al(value); + internal_set(al, d, false); +} + +void MacroAssembler::set(address addr, Register d, RelocationHolder const& rspec) { + AddressLiteral al(addr, rspec); + internal_set(al, d, false); +} + +void MacroAssembler::patchable_set(const AddressLiteral& al, Register d) { + internal_set(al, d, true); +} + +void MacroAssembler::patchable_set(intptr_t value, Register d) { + AddressLiteral al(value); + internal_set(al, d, true); +} + + +void MacroAssembler::set64(jlong value, Register d, Register tmp) { + assert_not_delayed(); + v9_dep(); + + int hi = (int)(value >> 32); + int lo = (int)(value & ~0); + int bits_33to2 = (int)((value >> 2) & ~0); + // (Matcher::isSimpleConstant64 knows about the following optimizations.) + if (Assembler::is_simm13(lo) && value == lo) { + or3(G0, lo, d); + } else if (hi == 0) { + Assembler::sethi(lo, d); // hardware version zero-extends to upper 32 + if (low10(lo) != 0) + or3(d, low10(lo), d); + } + else if ((hi >> 2) == 0) { + Assembler::sethi(bits_33to2, d); // hardware version zero-extends to upper 32 + sllx(d, 2, d); + if (low12(lo) != 0) + or3(d, low12(lo), d); + } + else if (hi == -1) { + Assembler::sethi(~lo, d); // hardware version zero-extends to upper 32 + xor3(d, low10(lo) ^ ~low10(~0), d); + } + else if (lo == 0) { + if (Assembler::is_simm13(hi)) { + or3(G0, hi, d); + } else { + Assembler::sethi(hi, d); // hardware version zero-extends to upper 32 + if (low10(hi) != 0) + or3(d, low10(hi), d); + } + sllx(d, 32, d); + } + else { + Assembler::sethi(hi, tmp); + Assembler::sethi(lo, d); // macro assembler version sign-extends + if (low10(hi) != 0) + or3 (tmp, low10(hi), tmp); + if (low10(lo) != 0) + or3 ( d, low10(lo), d); + sllx(tmp, 32, tmp); + or3 (d, tmp, d); + } +} + +int MacroAssembler::insts_for_set64(jlong value) { + v9_dep(); + + int hi = (int) (value >> 32); + int lo = (int) (value & ~0); + int count = 0; + + // (Matcher::isSimpleConstant64 knows about the following optimizations.) + if (Assembler::is_simm13(lo) && value == lo) { + count++; + } else if (hi == 0) { + count++; + if (low10(lo) != 0) + count++; + } + else if (hi == -1) { + count += 2; + } + else if (lo == 0) { + if (Assembler::is_simm13(hi)) { + count++; + } else { + count++; + if (low10(hi) != 0) + count++; + } + count++; + } + else { + count += 2; + if (low10(hi) != 0) + count++; + if (low10(lo) != 0) + count++; + count += 2; + } + return count; +} + +// compute size in bytes of sparc frame, given +// number of extraWords +int MacroAssembler::total_frame_size_in_bytes(int extraWords) { + + int nWords = frame::memory_parameter_word_sp_offset; + + nWords += extraWords; + + if (nWords & 1) ++nWords; // round up to double-word + + return nWords * BytesPerWord; +} + + +// save_frame: given number of "extra" words in frame, +// issue approp. save instruction (p 200, v8 manual) + +void MacroAssembler::save_frame(int extraWords) { + int delta = -total_frame_size_in_bytes(extraWords); + if (is_simm13(delta)) { + save(SP, delta, SP); + } else { + set(delta, G3_scratch); + save(SP, G3_scratch, SP); + } +} + + +void MacroAssembler::save_frame_c1(int size_in_bytes) { + if (is_simm13(-size_in_bytes)) { + save(SP, -size_in_bytes, SP); + } else { + set(-size_in_bytes, G3_scratch); + save(SP, G3_scratch, SP); + } +} + + +void MacroAssembler::save_frame_and_mov(int extraWords, + Register s1, Register d1, + Register s2, Register d2) { + assert_not_delayed(); + + // The trick here is to use precisely the same memory word + // that trap handlers also use to save the register. + // This word cannot be used for any other purpose, but + // it works fine to save the register's value, whether or not + // an interrupt flushes register windows at any given moment! + Address s1_addr; + if (s1->is_valid() && (s1->is_in() || s1->is_local())) { + s1_addr = s1->address_in_saved_window(); + st_ptr(s1, s1_addr); + } + + Address s2_addr; + if (s2->is_valid() && (s2->is_in() || s2->is_local())) { + s2_addr = s2->address_in_saved_window(); + st_ptr(s2, s2_addr); + } + + save_frame(extraWords); + + if (s1_addr.base() == SP) { + ld_ptr(s1_addr.after_save(), d1); + } else if (s1->is_valid()) { + mov(s1->after_save(), d1); + } + + if (s2_addr.base() == SP) { + ld_ptr(s2_addr.after_save(), d2); + } else if (s2->is_valid()) { + mov(s2->after_save(), d2); + } +} + + +AddressLiteral MacroAssembler::allocate_metadata_address(Metadata* obj) { + assert(oop_recorder() != NULL, "this assembler needs a Recorder"); + int index = oop_recorder()->allocate_metadata_index(obj); + RelocationHolder rspec = metadata_Relocation::spec(index); + return AddressLiteral((address)obj, rspec); +} + +AddressLiteral MacroAssembler::constant_metadata_address(Metadata* obj) { + assert(oop_recorder() != NULL, "this assembler needs a Recorder"); + int index = oop_recorder()->find_index(obj); + RelocationHolder rspec = metadata_Relocation::spec(index); + return AddressLiteral((address)obj, rspec); +} + + +AddressLiteral MacroAssembler::constant_oop_address(jobject obj) { +#ifdef ASSERT + { + ThreadInVMfromUnknown tiv; + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "not an oop"); + } +#endif + int oop_index = oop_recorder()->find_index(obj); + return AddressLiteral(obj, oop_Relocation::spec(oop_index)); +} + +void MacroAssembler::set_narrow_oop(jobject obj, Register d) { + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int oop_index = oop_recorder()->find_index(obj); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + + assert_not_delayed(); + // Relocation with special format (see relocInfo_sparc.hpp). + relocate(rspec, 1); + // Assembler::sethi(0x3fffff, d); + emit_int32( op(branch_op) | rd(d) | op2(sethi_op2) | hi22(0x3fffff) ); + // Don't add relocation for 'add'. Do patching during 'sethi' processing. + add(d, 0x3ff, d); + +} + +void MacroAssembler::set_narrow_klass(Klass* k, Register d) { + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int klass_index = oop_recorder()->find_index(k); + RelocationHolder rspec = metadata_Relocation::spec(klass_index); + narrowKlass encoded_k = CompressedKlassPointers::encode(k); + + assert_not_delayed(); + // Relocation with special format (see relocInfo_sparc.hpp). + relocate(rspec, 1); + // Assembler::sethi(encoded_k, d); + emit_int32( op(branch_op) | rd(d) | op2(sethi_op2) | hi22(encoded_k) ); + // Don't add relocation for 'add'. Do patching during 'sethi' processing. + add(d, low10(encoded_k), d); + +} + +void MacroAssembler::align(int modulus) { + while (offset() % modulus != 0) nop(); +} + +void RegistersForDebugging::print(outputStream* s) { + FlagSetting fs(Debugging, true); + int j; + for (j = 0; j < 8; ++j) { + if (j != 6) { s->print("i%d = ", j); os::print_location(s, i[j]); } + else { s->print( "fp = " ); os::print_location(s, i[j]); } + } + s->cr(); + + for (j = 0; j < 8; ++j) { + s->print("l%d = ", j); os::print_location(s, l[j]); + } + s->cr(); + + for (j = 0; j < 8; ++j) { + if (j != 6) { s->print("o%d = ", j); os::print_location(s, o[j]); } + else { s->print( "sp = " ); os::print_location(s, o[j]); } + } + s->cr(); + + for (j = 0; j < 8; ++j) { + s->print("g%d = ", j); os::print_location(s, g[j]); + } + s->cr(); + + // print out floats with compression + for (j = 0; j < 32; ) { + jfloat val = f[j]; + int last = j; + for ( ; last+1 < 32; ++last ) { + char b1[1024], b2[1024]; + sprintf(b1, "%f", val); + sprintf(b2, "%f", f[last+1]); + if (strcmp(b1, b2)) + break; + } + s->print("f%d", j); + if ( j != last ) s->print(" - f%d", last); + s->print(" = %f", val); + s->fill_to(25); + s->print_cr(" (0x%x)", *(int*)&val); + j = last + 1; + } + s->cr(); + + // and doubles (evens only) + for (j = 0; j < 32; ) { + jdouble val = d[j]; + int last = j; + for ( ; last+1 < 32; ++last ) { + char b1[1024], b2[1024]; + sprintf(b1, "%f", val); + sprintf(b2, "%f", d[last+1]); + if (strcmp(b1, b2)) + break; + } + s->print("d%d", 2 * j); + if ( j != last ) s->print(" - d%d", last); + s->print(" = %f", val); + s->fill_to(30); + s->print("(0x%x)", *(int*)&val); + s->fill_to(42); + s->print_cr("(0x%x)", *(1 + (int*)&val)); + j = last + 1; + } + s->cr(); +} + +void RegistersForDebugging::save_registers(MacroAssembler* a) { + a->sub(FP, align_up(sizeof(RegistersForDebugging), sizeof(jdouble)) - STACK_BIAS, O0); + a->flushw(); + int i; + for (i = 0; i < 8; ++i) { + a->ld_ptr(as_iRegister(i)->address_in_saved_window().after_save(), L1); a->st_ptr( L1, O0, i_offset(i)); + a->ld_ptr(as_lRegister(i)->address_in_saved_window().after_save(), L1); a->st_ptr( L1, O0, l_offset(i)); + a->st_ptr(as_oRegister(i)->after_save(), O0, o_offset(i)); + a->st_ptr(as_gRegister(i)->after_save(), O0, g_offset(i)); + } + for (i = 0; i < 32; ++i) { + a->stf(FloatRegisterImpl::S, as_FloatRegister(i), O0, f_offset(i)); + } + for (i = 0; i < 64; i += 2) { + a->stf(FloatRegisterImpl::D, as_FloatRegister(i), O0, d_offset(i)); + } +} + +void RegistersForDebugging::restore_registers(MacroAssembler* a, Register r) { + for (int i = 1; i < 8; ++i) { + a->ld_ptr(r, g_offset(i), as_gRegister(i)); + } + for (int j = 0; j < 32; ++j) { + a->ldf(FloatRegisterImpl::S, O0, f_offset(j), as_FloatRegister(j)); + } + for (int k = 0; k < 64; k += 2) { + a->ldf(FloatRegisterImpl::D, O0, d_offset(k), as_FloatRegister(k)); + } +} + +void MacroAssembler::_verify_oop(Register reg, const char* msg, const char * file, int line) { + // plausibility check for oops + if (!VerifyOops) return; + + if (reg == G0) return; // always NULL, which is always an oop + + BLOCK_COMMENT("verify_oop {"); + char buffer[64]; +#ifdef COMPILER1 + if (CommentedAssembly) { + snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset()); + block_comment(buffer); + } +#endif + + const char* real_msg = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("%s at offset %d (%s:%d)", msg, offset(), file, line); + real_msg = code_string(ss.as_string()); + } + + // Call indirectly to solve generation ordering problem + AddressLiteral a(StubRoutines::verify_oop_subroutine_entry_address()); + + // Make some space on stack above the current register window. + // Enough to hold 8 64-bit registers. + add(SP,-8*8,SP); + + // Save some 64-bit registers; a normal 'save' chops the heads off + // of 64-bit longs in the 32-bit build. + stx(O0,SP,frame::register_save_words*wordSize+STACK_BIAS+0*8); + stx(O1,SP,frame::register_save_words*wordSize+STACK_BIAS+1*8); + mov(reg,O0); // Move arg into O0; arg might be in O7 which is about to be crushed + stx(O7,SP,frame::register_save_words*wordSize+STACK_BIAS+7*8); + + // Size of set() should stay the same + patchable_set((intptr_t)real_msg, O1); + // Load address to call to into O7 + load_ptr_contents(a, O7); + // Register call to verify_oop_subroutine + callr(O7, G0); + delayed()->nop(); + // recover frame size + add(SP, 8*8,SP); + BLOCK_COMMENT("} verify_oop"); +} + +void MacroAssembler::_verify_oop_addr(Address addr, const char* msg, const char * file, int line) { + // plausibility check for oops + if (!VerifyOops) return; + + const char* real_msg = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("%s at SP+%d (%s:%d)", msg, addr.disp(), file, line); + real_msg = code_string(ss.as_string()); + } + + // Call indirectly to solve generation ordering problem + AddressLiteral a(StubRoutines::verify_oop_subroutine_entry_address()); + + // Make some space on stack above the current register window. + // Enough to hold 8 64-bit registers. + add(SP,-8*8,SP); + + // Save some 64-bit registers; a normal 'save' chops the heads off + // of 64-bit longs in the 32-bit build. + stx(O0,SP,frame::register_save_words*wordSize+STACK_BIAS+0*8); + stx(O1,SP,frame::register_save_words*wordSize+STACK_BIAS+1*8); + ld_ptr(addr.base(), addr.disp() + 8*8, O0); // Load arg into O0; arg might be in O7 which is about to be crushed + stx(O7,SP,frame::register_save_words*wordSize+STACK_BIAS+7*8); + + // Size of set() should stay the same + patchable_set((intptr_t)real_msg, O1); + // Load address to call to into O7 + load_ptr_contents(a, O7); + // Register call to verify_oop_subroutine + callr(O7, G0); + delayed()->nop(); + // recover frame size + add(SP, 8*8,SP); +} + +// side-door communication with signalHandler in os_solaris.cpp +address MacroAssembler::_verify_oop_implicit_branch[3] = { NULL }; + +// This macro is expanded just once; it creates shared code. Contract: +// receives an oop in O0. Must restore O0 & O7 from TLS. Must not smash ANY +// registers, including flags. May not use a register 'save', as this blows +// the high bits of the O-regs if they contain Long values. Acts as a 'leaf' +// call. +void MacroAssembler::verify_oop_subroutine() { + // Leaf call; no frame. + Label succeed, fail, null_or_fail; + + // O0 and O7 were saved already (O0 in O0's TLS home, O7 in O5's TLS home). + // O0 is now the oop to be checked. O7 is the return address. + Register O0_obj = O0; + + // Save some more registers for temps. + stx(O2,SP,frame::register_save_words*wordSize+STACK_BIAS+2*8); + stx(O3,SP,frame::register_save_words*wordSize+STACK_BIAS+3*8); + stx(O4,SP,frame::register_save_words*wordSize+STACK_BIAS+4*8); + stx(O5,SP,frame::register_save_words*wordSize+STACK_BIAS+5*8); + + // Save flags + Register O5_save_flags = O5; + rdccr( O5_save_flags ); + + { // count number of verifies + Register O2_adr = O2; + Register O3_accum = O3; + inc_counter(StubRoutines::verify_oop_count_addr(), O2_adr, O3_accum); + } + + Register O2_mask = O2; + Register O3_bits = O3; + Register O4_temp = O4; + + // mark lower end of faulting range + assert(_verify_oop_implicit_branch[0] == NULL, "set once"); + _verify_oop_implicit_branch[0] = pc(); + + // We can't check the mark oop because it could be in the process of + // locking or unlocking while this is running. + set(Universe::verify_oop_mask (), O2_mask); + set(Universe::verify_oop_bits (), O3_bits); + + // assert((obj & oop_mask) == oop_bits); + and3(O0_obj, O2_mask, O4_temp); + cmp_and_brx_short(O4_temp, O3_bits, notEqual, pn, null_or_fail); + + if ((NULL_WORD & Universe::verify_oop_mask()) == Universe::verify_oop_bits()) { + // the null_or_fail case is useless; must test for null separately + br_null_short(O0_obj, pn, succeed); + } + + // Check the Klass* of this object for being in the right area of memory. + // Cannot do the load in the delay above slot in case O0 is null + load_klass(O0_obj, O0_obj); + // assert((klass != NULL) + br_null_short(O0_obj, pn, fail); + + wrccr( O5_save_flags ); // Restore CCR's + + // mark upper end of faulting range + _verify_oop_implicit_branch[1] = pc(); + + //----------------------- + // all tests pass + bind(succeed); + + // Restore prior 64-bit registers + ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+0*8,O0); + ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+1*8,O1); + ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+2*8,O2); + ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+3*8,O3); + ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+4*8,O4); + ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+5*8,O5); + + retl(); // Leaf return; restore prior O7 in delay slot + delayed()->ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+7*8,O7); + + //----------------------- + bind(null_or_fail); // nulls are less common but OK + br_null(O0_obj, false, pt, succeed); + delayed()->wrccr( O5_save_flags ); // Restore CCR's + + //----------------------- + // report failure: + bind(fail); + _verify_oop_implicit_branch[2] = pc(); + + wrccr( O5_save_flags ); // Restore CCR's + + save_frame(align_up(sizeof(RegistersForDebugging) / BytesPerWord, 2)); + + // stop_subroutine expects message pointer in I1. + mov(I1, O1); + + // Restore prior 64-bit registers + ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+0*8,I0); + ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+1*8,I1); + ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+2*8,I2); + ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+3*8,I3); + ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+4*8,I4); + ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+5*8,I5); + + // factor long stop-sequence into subroutine to save space + assert(StubRoutines::Sparc::stop_subroutine_entry_address(), "hasn't been generated yet"); + + // call indirectly to solve generation ordering problem + AddressLiteral al(StubRoutines::Sparc::stop_subroutine_entry_address()); + load_ptr_contents(al, O5); + jmpl(O5, 0, O7); + delayed()->nop(); +} + + +void MacroAssembler::stop(const char* msg) { + // save frame first to get O7 for return address + // add one word to size in case struct is odd number of words long + // It must be doubleword-aligned for storing doubles into it. + + save_frame(align_up(sizeof(RegistersForDebugging) / BytesPerWord, 2)); + + // stop_subroutine expects message pointer in I1. + // Size of set() should stay the same + patchable_set((intptr_t)msg, O1); + + // factor long stop-sequence into subroutine to save space + assert(StubRoutines::Sparc::stop_subroutine_entry_address(), "hasn't been generated yet"); + + // call indirectly to solve generation ordering problem + AddressLiteral a(StubRoutines::Sparc::stop_subroutine_entry_address()); + load_ptr_contents(a, O5); + jmpl(O5, 0, O7); + delayed()->nop(); + + breakpoint_trap(); // make stop actually stop rather than writing + // unnoticeable results in the output files. + + // restore(); done in callee to save space! +} + + +void MacroAssembler::warn(const char* msg) { + save_frame(align_up(sizeof(RegistersForDebugging) / BytesPerWord, 2)); + RegistersForDebugging::save_registers(this); + mov(O0, L0); + // Size of set() should stay the same + patchable_set((intptr_t)msg, O0); + call( CAST_FROM_FN_PTR(address, warning) ); + delayed()->nop(); +// ret(); +// delayed()->restore(); + RegistersForDebugging::restore_registers(this, L0); + restore(); +} + + +void MacroAssembler::untested(const char* what) { + // We must be able to turn interactive prompting off + // in order to run automated test scripts on the VM + // Use the flag ShowMessageBoxOnError + + const char* b = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("untested: %s", what); + b = code_string(ss.as_string()); + } + if (ShowMessageBoxOnError) { STOP(b); } + else { warn(b); } +} + + +void MacroAssembler::unimplemented(const char* what) { + const char* buf = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("unimplemented: %s", what); + buf = code_string(ss.as_string()); + } + stop(buf); +} + + +void MacroAssembler::stop_subroutine() { + RegistersForDebugging::save_registers(this); + + // for the sake of the debugger, stick a PC on the current frame + // (this assumes that the caller has performed an extra "save") + mov(I7, L7); + add(O7, -7 * BytesPerInt, I7); + + save_frame(); // one more save to free up another O7 register + mov(I0, O1); // addr of reg save area + + // We expect pointer to message in I1. Caller must set it up in O1 + mov(I1, O0); // get msg + call (CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); + delayed()->nop(); + + restore(); + + RegistersForDebugging::restore_registers(this, O0); + + save_frame(0); + call(CAST_FROM_FN_PTR(address,breakpoint)); + delayed()->nop(); + restore(); + + mov(L7, I7); + retl(); + delayed()->restore(); // see stop above +} + + +void MacroAssembler::debug(char* msg, RegistersForDebugging* regs) { + if ( ShowMessageBoxOnError ) { + JavaThread* thread = JavaThread::current(); + JavaThreadState saved_state = thread->thread_state(); + thread->set_thread_state(_thread_in_vm); + { + // In order to get locks work, we need to fake a in_VM state + ttyLocker ttyl; + ::tty->print_cr("EXECUTION STOPPED: %s\n", msg); + if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { + BytecodeCounter::print(); + } + if (os::message_box(msg, "Execution stopped, print registers?")) + regs->print(::tty); + } + BREAKPOINT; + ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state); + } + else { + ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); + } + assert(false, "DEBUG MESSAGE: %s", msg); +} + + +void MacroAssembler::calc_mem_param_words(Register Rparam_words, Register Rresult) { + subcc( Rparam_words, Argument::n_register_parameters, Rresult); // how many mem words? + Label no_extras; + br( negative, true, pt, no_extras ); // if neg, clear reg + delayed()->set(0, Rresult); // annulled, so only if taken + bind( no_extras ); +} + + +void MacroAssembler::calc_frame_size(Register Rextra_words, Register Rresult) { + add(Rextra_words, frame::memory_parameter_word_sp_offset, Rresult); + bclr(1, Rresult); + sll(Rresult, LogBytesPerWord, Rresult); // Rresult has total frame bytes +} + + +void MacroAssembler::calc_frame_size_and_save(Register Rextra_words, Register Rresult) { + calc_frame_size(Rextra_words, Rresult); + neg(Rresult); + save(SP, Rresult, SP); +} + + +// --------------------------------------------------------- +Assembler::RCondition cond2rcond(Assembler::Condition c) { + switch (c) { + /*case zero: */ + case Assembler::equal: return Assembler::rc_z; + case Assembler::lessEqual: return Assembler::rc_lez; + case Assembler::less: return Assembler::rc_lz; + /*case notZero:*/ + case Assembler::notEqual: return Assembler::rc_nz; + case Assembler::greater: return Assembler::rc_gz; + case Assembler::greaterEqual: return Assembler::rc_gez; + } + ShouldNotReachHere(); + return Assembler::rc_z; +} + +// compares (32 bit) register with zero and branches. NOT FOR USE WITH 64-bit POINTERS +void MacroAssembler::cmp_zero_and_br(Condition c, Register s1, Label& L, bool a, Predict p) { + tst(s1); + br (c, a, p, L); +} + +// Compares a pointer register with zero and branches on null. +// Does a test & branch on 32-bit systems and a register-branch on 64-bit. +void MacroAssembler::br_null( Register s1, bool a, Predict p, Label& L ) { + assert_not_delayed(); + bpr( rc_z, a, p, s1, L ); +} + +void MacroAssembler::br_notnull( Register s1, bool a, Predict p, Label& L ) { + assert_not_delayed(); + bpr( rc_nz, a, p, s1, L ); +} + +// Compare registers and branch with nop in delay slot or cbcond without delay slot. + +// Compare integer (32 bit) values (icc only). +void MacroAssembler::cmp_and_br_short(Register s1, Register s2, Condition c, + Predict p, Label& L) { + assert_not_delayed(); + if (use_cbcond(L)) { + Assembler::cbcond(c, icc, s1, s2, L); + } else { + cmp(s1, s2); + br(c, false, p, L); + delayed()->nop(); + } +} + +// Compare integer (32 bit) values (icc only). +void MacroAssembler::cmp_and_br_short(Register s1, int simm13a, Condition c, + Predict p, Label& L) { + assert_not_delayed(); + if (is_simm(simm13a,5) && use_cbcond(L)) { + Assembler::cbcond(c, icc, s1, simm13a, L); + } else { + cmp(s1, simm13a); + br(c, false, p, L); + delayed()->nop(); + } +} + +// Branch that tests xcc in LP64 and icc in !LP64 +void MacroAssembler::cmp_and_brx_short(Register s1, Register s2, Condition c, + Predict p, Label& L) { + assert_not_delayed(); + if (use_cbcond(L)) { + Assembler::cbcond(c, ptr_cc, s1, s2, L); + } else { + cmp(s1, s2); + brx(c, false, p, L); + delayed()->nop(); + } +} + +// Branch that tests xcc in LP64 and icc in !LP64 +void MacroAssembler::cmp_and_brx_short(Register s1, int simm13a, Condition c, + Predict p, Label& L) { + assert_not_delayed(); + if (is_simm(simm13a,5) && use_cbcond(L)) { + Assembler::cbcond(c, ptr_cc, s1, simm13a, L); + } else { + cmp(s1, simm13a); + brx(c, false, p, L); + delayed()->nop(); + } +} + +// Short branch version for compares a pointer with zero. + +void MacroAssembler::br_null_short(Register s1, Predict p, Label& L) { + assert_not_delayed(); + if (use_cbcond(L)) { + Assembler::cbcond(zero, ptr_cc, s1, 0, L); + } else { + br_null(s1, false, p, L); + delayed()->nop(); + } +} + +void MacroAssembler::br_notnull_short(Register s1, Predict p, Label& L) { + assert_not_delayed(); + if (use_cbcond(L)) { + Assembler::cbcond(notZero, ptr_cc, s1, 0, L); + } else { + br_notnull(s1, false, p, L); + delayed()->nop(); + } +} + +// Unconditional short branch +void MacroAssembler::ba_short(Label& L) { + assert_not_delayed(); + if (use_cbcond(L)) { + Assembler::cbcond(equal, icc, G0, G0, L); + } else { + br(always, false, pt, L); + delayed()->nop(); + } +} + +// Branch if 'icc' says zero or not (i.e. icc.z == 1|0). + +void MacroAssembler::br_icc_zero(bool iszero, Predict p, Label &L) { + assert_not_delayed(); + Condition cf = (iszero ? Assembler::zero : Assembler::notZero); + br(cf, false, p, L); + delayed()->nop(); +} + +// instruction sequences factored across compiler & interpreter + + +void MacroAssembler::lcmp( Register Ra_hi, Register Ra_low, + Register Rb_hi, Register Rb_low, + Register Rresult) { + + Label check_low_parts, done; + + cmp(Ra_hi, Rb_hi ); // compare hi parts + br(equal, true, pt, check_low_parts); + delayed()->cmp(Ra_low, Rb_low); // test low parts + + // And, with an unsigned comparison, it does not matter if the numbers + // are negative or not. + // E.g., -2 cmp -1: the low parts are 0xfffffffe and 0xffffffff. + // The second one is bigger (unsignedly). + + // Other notes: The first move in each triplet can be unconditional + // (and therefore probably prefetchable). + // And the equals case for the high part does not need testing, + // since that triplet is reached only after finding the high halves differ. + + mov(-1, Rresult); + ba(done); + delayed()->movcc(greater, false, icc, 1, Rresult); + + bind(check_low_parts); + + mov( -1, Rresult); + movcc(equal, false, icc, 0, Rresult); + movcc(greaterUnsigned, false, icc, 1, Rresult); + + bind(done); +} + +void MacroAssembler::lneg( Register Rhi, Register Rlow ) { + subcc( G0, Rlow, Rlow ); + subc( G0, Rhi, Rhi ); +} + +void MacroAssembler::lshl( Register Rin_high, Register Rin_low, + Register Rcount, + Register Rout_high, Register Rout_low, + Register Rtemp ) { + + + Register Ralt_count = Rtemp; + Register Rxfer_bits = Rtemp; + + assert( Ralt_count != Rin_high + && Ralt_count != Rin_low + && Ralt_count != Rcount + && Rxfer_bits != Rin_low + && Rxfer_bits != Rin_high + && Rxfer_bits != Rcount + && Rxfer_bits != Rout_low + && Rout_low != Rin_high, + "register alias checks"); + + Label big_shift, done; + + // This code can be optimized to use the 64 bit shifts in V9. + // Here we use the 32 bit shifts. + + and3( Rcount, 0x3f, Rcount); // take least significant 6 bits + subcc(Rcount, 31, Ralt_count); + br(greater, true, pn, big_shift); + delayed()->dec(Ralt_count); + + // shift < 32 bits, Ralt_count = Rcount-31 + + // We get the transfer bits by shifting right by 32-count the low + // register. This is done by shifting right by 31-count and then by one + // more to take care of the special (rare) case where count is zero + // (shifting by 32 would not work). + + neg(Ralt_count); + + // The order of the next two instructions is critical in the case where + // Rin and Rout are the same and should not be reversed. + + srl(Rin_low, Ralt_count, Rxfer_bits); // shift right by 31-count + if (Rcount != Rout_low) { + sll(Rin_low, Rcount, Rout_low); // low half + } + sll(Rin_high, Rcount, Rout_high); + if (Rcount == Rout_low) { + sll(Rin_low, Rcount, Rout_low); // low half + } + srl(Rxfer_bits, 1, Rxfer_bits ); // shift right by one more + ba(done); + delayed()->or3(Rout_high, Rxfer_bits, Rout_high); // new hi value: or in shifted old hi part and xfer from low + + // shift >= 32 bits, Ralt_count = Rcount-32 + bind(big_shift); + sll(Rin_low, Ralt_count, Rout_high ); + clr(Rout_low); + + bind(done); +} + + +void MacroAssembler::lshr( Register Rin_high, Register Rin_low, + Register Rcount, + Register Rout_high, Register Rout_low, + Register Rtemp ) { + + Register Ralt_count = Rtemp; + Register Rxfer_bits = Rtemp; + + assert( Ralt_count != Rin_high + && Ralt_count != Rin_low + && Ralt_count != Rcount + && Rxfer_bits != Rin_low + && Rxfer_bits != Rin_high + && Rxfer_bits != Rcount + && Rxfer_bits != Rout_high + && Rout_high != Rin_low, + "register alias checks"); + + Label big_shift, done; + + // This code can be optimized to use the 64 bit shifts in V9. + // Here we use the 32 bit shifts. + + and3( Rcount, 0x3f, Rcount); // take least significant 6 bits + subcc(Rcount, 31, Ralt_count); + br(greater, true, pn, big_shift); + delayed()->dec(Ralt_count); + + // shift < 32 bits, Ralt_count = Rcount-31 + + // We get the transfer bits by shifting left by 32-count the high + // register. This is done by shifting left by 31-count and then by one + // more to take care of the special (rare) case where count is zero + // (shifting by 32 would not work). + + neg(Ralt_count); + if (Rcount != Rout_low) { + srl(Rin_low, Rcount, Rout_low); + } + + // The order of the next two instructions is critical in the case where + // Rin and Rout are the same and should not be reversed. + + sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count + sra(Rin_high, Rcount, Rout_high ); // high half + sll(Rxfer_bits, 1, Rxfer_bits); // shift left by one more + if (Rcount == Rout_low) { + srl(Rin_low, Rcount, Rout_low); + } + ba(done); + delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high + + // shift >= 32 bits, Ralt_count = Rcount-32 + bind(big_shift); + + sra(Rin_high, Ralt_count, Rout_low); + sra(Rin_high, 31, Rout_high); // sign into hi + + bind( done ); +} + + + +void MacroAssembler::lushr( Register Rin_high, Register Rin_low, + Register Rcount, + Register Rout_high, Register Rout_low, + Register Rtemp ) { + + Register Ralt_count = Rtemp; + Register Rxfer_bits = Rtemp; + + assert( Ralt_count != Rin_high + && Ralt_count != Rin_low + && Ralt_count != Rcount + && Rxfer_bits != Rin_low + && Rxfer_bits != Rin_high + && Rxfer_bits != Rcount + && Rxfer_bits != Rout_high + && Rout_high != Rin_low, + "register alias checks"); + + Label big_shift, done; + + // This code can be optimized to use the 64 bit shifts in V9. + // Here we use the 32 bit shifts. + + and3( Rcount, 0x3f, Rcount); // take least significant 6 bits + subcc(Rcount, 31, Ralt_count); + br(greater, true, pn, big_shift); + delayed()->dec(Ralt_count); + + // shift < 32 bits, Ralt_count = Rcount-31 + + // We get the transfer bits by shifting left by 32-count the high + // register. This is done by shifting left by 31-count and then by one + // more to take care of the special (rare) case where count is zero + // (shifting by 32 would not work). + + neg(Ralt_count); + if (Rcount != Rout_low) { + srl(Rin_low, Rcount, Rout_low); + } + + // The order of the next two instructions is critical in the case where + // Rin and Rout are the same and should not be reversed. + + sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count + srl(Rin_high, Rcount, Rout_high ); // high half + sll(Rxfer_bits, 1, Rxfer_bits); // shift left by one more + if (Rcount == Rout_low) { + srl(Rin_low, Rcount, Rout_low); + } + ba(done); + delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high + + // shift >= 32 bits, Ralt_count = Rcount-32 + bind(big_shift); + + srl(Rin_high, Ralt_count, Rout_low); + clr(Rout_high); + + bind( done ); +} + +void MacroAssembler::lcmp( Register Ra, Register Rb, Register Rresult) { + cmp(Ra, Rb); + mov(-1, Rresult); + movcc(equal, false, xcc, 0, Rresult); + movcc(greater, false, xcc, 1, Rresult); +} + + +void MacroAssembler::load_sized_value(Address src, Register dst, size_t size_in_bytes, bool is_signed) { + switch (size_in_bytes) { + case 8: ld_long(src, dst); break; + case 4: ld( src, dst); break; + case 2: is_signed ? ldsh(src, dst) : lduh(src, dst); break; + case 1: is_signed ? ldsb(src, dst) : ldub(src, dst); break; + default: ShouldNotReachHere(); + } +} + +void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) { + switch (size_in_bytes) { + case 8: st_long(src, dst); break; + case 4: st( src, dst); break; + case 2: sth( src, dst); break; + case 1: stb( src, dst); break; + default: ShouldNotReachHere(); + } +} + + +void MacroAssembler::float_cmp( bool is_float, int unordered_result, + FloatRegister Fa, FloatRegister Fb, + Register Rresult) { + if (is_float) { + fcmp(FloatRegisterImpl::S, fcc0, Fa, Fb); + } else { + fcmp(FloatRegisterImpl::D, fcc0, Fa, Fb); + } + + if (unordered_result == 1) { + mov( -1, Rresult); + movcc(f_equal, true, fcc0, 0, Rresult); + movcc(f_unorderedOrGreater, true, fcc0, 1, Rresult); + } else { + mov( -1, Rresult); + movcc(f_equal, true, fcc0, 0, Rresult); + movcc(f_greater, true, fcc0, 1, Rresult); + } +} + + +void MacroAssembler::save_all_globals_into_locals() { + mov(G1,L1); + mov(G2,L2); + mov(G3,L3); + mov(G4,L4); + mov(G5,L5); + mov(G6,L6); + mov(G7,L7); +} + +void MacroAssembler::restore_globals_from_locals() { + mov(L1,G1); + mov(L2,G2); + mov(L3,G3); + mov(L4,G4); + mov(L5,G5); + mov(L6,G6); + mov(L7,G7); +} + +RegisterOrConstant MacroAssembler::regcon_andn_ptr(RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp) { + assert(d.register_or_noreg() != G0, "lost side effect"); + if ((s2.is_constant() && s2.as_constant() == 0) || + (s2.is_register() && s2.as_register() == G0)) { + // Do nothing, just move value. + if (s1.is_register()) { + if (d.is_constant()) d = temp; + mov(s1.as_register(), d.as_register()); + return d; + } else { + return s1; + } + } + + if (s1.is_register()) { + assert_different_registers(s1.as_register(), temp); + if (d.is_constant()) d = temp; + andn(s1.as_register(), ensure_simm13_or_reg(s2, temp), d.as_register()); + return d; + } else { + if (s2.is_register()) { + assert_different_registers(s2.as_register(), temp); + if (d.is_constant()) d = temp; + set(s1.as_constant(), temp); + andn(temp, s2.as_register(), d.as_register()); + return d; + } else { + intptr_t res = s1.as_constant() & ~s2.as_constant(); + return res; + } + } +} + +RegisterOrConstant MacroAssembler::regcon_inc_ptr(RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp) { + assert(d.register_or_noreg() != G0, "lost side effect"); + if ((s2.is_constant() && s2.as_constant() == 0) || + (s2.is_register() && s2.as_register() == G0)) { + // Do nothing, just move value. + if (s1.is_register()) { + if (d.is_constant()) d = temp; + mov(s1.as_register(), d.as_register()); + return d; + } else { + return s1; + } + } + + if (s1.is_register()) { + assert_different_registers(s1.as_register(), temp); + if (d.is_constant()) d = temp; + add(s1.as_register(), ensure_simm13_or_reg(s2, temp), d.as_register()); + return d; + } else { + if (s2.is_register()) { + assert_different_registers(s2.as_register(), temp); + if (d.is_constant()) d = temp; + add(s2.as_register(), ensure_simm13_or_reg(s1, temp), d.as_register()); + return d; + } else { + intptr_t res = s1.as_constant() + s2.as_constant(); + return res; + } + } +} + +RegisterOrConstant MacroAssembler::regcon_sll_ptr(RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp) { + assert(d.register_or_noreg() != G0, "lost side effect"); + if (!is_simm13(s2.constant_or_zero())) + s2 = (s2.as_constant() & 0xFF); + if ((s2.is_constant() && s2.as_constant() == 0) || + (s2.is_register() && s2.as_register() == G0)) { + // Do nothing, just move value. + if (s1.is_register()) { + if (d.is_constant()) d = temp; + mov(s1.as_register(), d.as_register()); + return d; + } else { + return s1; + } + } + + if (s1.is_register()) { + assert_different_registers(s1.as_register(), temp); + if (d.is_constant()) d = temp; + sll_ptr(s1.as_register(), ensure_simm13_or_reg(s2, temp), d.as_register()); + return d; + } else { + if (s2.is_register()) { + assert_different_registers(s2.as_register(), temp); + if (d.is_constant()) d = temp; + set(s1.as_constant(), temp); + sll_ptr(temp, s2.as_register(), d.as_register()); + return d; + } else { + intptr_t res = s1.as_constant() << s2.as_constant(); + return res; + } + } +} + + +// Look up the method for a megamorphic invokeinterface call. +// The target method is determined by . +// The receiver klass is in recv_klass. +// On success, the result will be in method_result, and execution falls through. +// On failure, execution transfers to the given label. +void MacroAssembler::lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register scan_temp, + Register sethi_temp, + Label& L_no_such_interface, + bool return_method) { + assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); + assert(!return_method || itable_index.is_constant() || itable_index.as_register() == method_result, + "caller must use same register for non-constant itable index as for method"); + + Label L_no_such_interface_restore; + bool did_save = false; + if (scan_temp == noreg || sethi_temp == noreg) { + Register recv_2 = recv_klass->is_global() ? recv_klass : L0; + Register intf_2 = intf_klass->is_global() ? intf_klass : L1; + assert(method_result->is_global(), "must be able to return value"); + scan_temp = L2; + sethi_temp = L3; + save_frame_and_mov(0, recv_klass, recv_2, intf_klass, intf_2); + recv_klass = recv_2; + intf_klass = intf_2; + did_save = true; + } + + // Compute start of first itableOffsetEntry (which is at the end of the vtable) + int vtable_base = in_bytes(Klass::vtable_start_offset()); + int scan_step = itableOffsetEntry::size() * wordSize; + int vte_size = vtableEntry::size_in_bytes(); + + lduw(recv_klass, in_bytes(Klass::vtable_length_offset()), scan_temp); + // %%% We should store the aligned, prescaled offset in the klassoop. + // Then the next several instructions would fold away. + + int itb_offset = vtable_base; + int itb_scale = exact_log2(vtableEntry::size_in_bytes()); + sll(scan_temp, itb_scale, scan_temp); + add(scan_temp, itb_offset, scan_temp); + add(recv_klass, scan_temp, scan_temp); + + if (return_method) { + // Adjust recv_klass by scaled itable_index, so we can free itable_index. + RegisterOrConstant itable_offset = itable_index; + itable_offset = regcon_sll_ptr(itable_index, exact_log2(itableMethodEntry::size() * wordSize), itable_offset); + itable_offset = regcon_inc_ptr(itable_offset, itableMethodEntry::method_offset_in_bytes(), itable_offset); + add(recv_klass, ensure_simm13_or_reg(itable_offset, sethi_temp), recv_klass); + } + + // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { + // if (scan->interface() == intf) { + // result = (klass + scan->offset() + itable_index); + // } + // } + Label L_search, L_found_method; + + for (int peel = 1; peel >= 0; peel--) { + // %%%% Could load both offset and interface in one ldx, if they were + // in the opposite order. This would save a load. + ld_ptr(scan_temp, itableOffsetEntry::interface_offset_in_bytes(), method_result); + + // Check that this entry is non-null. A null entry means that + // the receiver class doesn't implement the interface, and wasn't the + // same as when the caller was compiled. + bpr(Assembler::rc_z, false, Assembler::pn, method_result, did_save ? L_no_such_interface_restore : L_no_such_interface); + delayed()->cmp(method_result, intf_klass); + + if (peel) { + brx(Assembler::equal, false, Assembler::pt, L_found_method); + } else { + brx(Assembler::notEqual, false, Assembler::pn, L_search); + // (invert the test to fall through to found_method...) + } + delayed()->add(scan_temp, scan_step, scan_temp); + + if (!peel) break; + + bind(L_search); + } + + bind(L_found_method); + + if (return_method) { + // Got a hit. + int ito_offset = itableOffsetEntry::offset_offset_in_bytes(); + // scan_temp[-scan_step] points to the vtable offset we need + ito_offset -= scan_step; + lduw(scan_temp, ito_offset, scan_temp); + ld_ptr(recv_klass, scan_temp, method_result); + } + + if (did_save) { + Label L_done; + ba(L_done); + delayed()->restore(); + + bind(L_no_such_interface_restore); + ba(L_no_such_interface); + delayed()->restore(); + + bind(L_done); + } +} + + +// virtual method calling +void MacroAssembler::lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result) { + assert_different_registers(recv_klass, method_result, vtable_index.register_or_noreg()); + Register sethi_temp = method_result; + const int base = in_bytes(Klass::vtable_start_offset()) + + // method pointer offset within the vtable entry: + vtableEntry::method_offset_in_bytes(); + RegisterOrConstant vtable_offset = vtable_index; + // Each of the following three lines potentially generates an instruction. + // But the total number of address formation instructions will always be + // at most two, and will often be zero. In any case, it will be optimal. + // If vtable_index is a register, we will have (sll_ptr N,x; inc_ptr B,x; ld_ptr k,x). + // If vtable_index is a constant, we will have at most (set B+X<is_global()) sub_2 = L0; + if (!sup_2->is_global()) sup_2 = L1; + bool did_save = false; + if (temp_reg == noreg || temp2_reg == noreg) { + temp_reg = L2; + temp2_reg = L3; + save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2); + sub_klass = sub_2; + super_klass = sup_2; + did_save = true; + } + Label L_failure, L_pop_to_failure, L_pop_to_success; + check_klass_subtype_fast_path(sub_klass, super_klass, + temp_reg, temp2_reg, + (did_save ? &L_pop_to_success : &L_success), + (did_save ? &L_pop_to_failure : &L_failure), NULL); + + if (!did_save) + save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2); + check_klass_subtype_slow_path(sub_2, sup_2, + L2, L3, L4, L5, + NULL, &L_pop_to_failure); + + // on success: + bind(L_pop_to_success); + restore(); + ba_short(L_success); + + // on failure: + bind(L_pop_to_failure); + restore(); + bind(L_failure); +} + + +void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + RegisterOrConstant super_check_offset) { + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); + + bool must_load_sco = (super_check_offset.constant_or_zero() == -1); + bool need_slow_path = (must_load_sco || + super_check_offset.constant_or_zero() == sco_offset); + + assert_different_registers(sub_klass, super_klass, temp_reg); + if (super_check_offset.is_register()) { + assert_different_registers(sub_klass, super_klass, temp_reg, + super_check_offset.as_register()); + } else if (must_load_sco) { + assert(temp2_reg != noreg, "supply either a temp or a register offset"); + } + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1 || + (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path), + "at most one NULL in the batch, usually"); + + // If the pointers are equal, we are done (e.g., String[] elements). + // This self-check enables sharing of secondary supertype arrays among + // non-primary types such as array-of-interface. Otherwise, each such + // type would need its own customized SSA. + // We move this check to the front of the fast path because many + // type checks are in fact trivially successful in this manner, + // so we get a nicely predicted branch right at the start of the check. + cmp(super_klass, sub_klass); + brx(Assembler::equal, false, Assembler::pn, *L_success); + delayed()->nop(); + + // Check the supertype display: + if (must_load_sco) { + // The super check offset is always positive... + lduw(super_klass, sco_offset, temp2_reg); + super_check_offset = RegisterOrConstant(temp2_reg); + // super_check_offset is register. + assert_different_registers(sub_klass, super_klass, temp_reg, super_check_offset.as_register()); + } + ld_ptr(sub_klass, super_check_offset, temp_reg); + cmp(super_klass, temp_reg); + + // This check has worked decisively for primary supers. + // Secondary supers are sought in the super_cache ('super_cache_addr'). + // (Secondary supers are interfaces and very deeply nested subtypes.) + // This works in the same check above because of a tricky aliasing + // between the super_cache and the primary super display elements. + // (The 'super_check_addr' can address either, as the case requires.) + // Note that the cache is updated below if it does not help us find + // what we need immediately. + // So if it was a primary super, we can just fail immediately. + // Otherwise, it's the slow path for us (no success at this point). + + // Hacked ba(), which may only be used just before L_fallthrough. +#define FINAL_JUMP(label) \ + if (&(label) != &L_fallthrough) { \ + ba(label); delayed()->nop(); \ + } + + if (super_check_offset.is_register()) { + brx(Assembler::equal, false, Assembler::pn, *L_success); + delayed()->cmp(super_check_offset.as_register(), sc_offset); + + if (L_failure == &L_fallthrough) { + brx(Assembler::equal, false, Assembler::pt, *L_slow_path); + delayed()->nop(); + } else { + brx(Assembler::notEqual, false, Assembler::pn, *L_failure); + delayed()->nop(); + FINAL_JUMP(*L_slow_path); + } + } else if (super_check_offset.as_constant() == sc_offset) { + // Need a slow path; fast failure is impossible. + if (L_slow_path == &L_fallthrough) { + brx(Assembler::equal, false, Assembler::pt, *L_success); + delayed()->nop(); + } else { + brx(Assembler::notEqual, false, Assembler::pn, *L_slow_path); + delayed()->nop(); + FINAL_JUMP(*L_success); + } + } else { + // No slow path; it's a fast decision. + if (L_failure == &L_fallthrough) { + brx(Assembler::equal, false, Assembler::pt, *L_success); + delayed()->nop(); + } else { + brx(Assembler::notEqual, false, Assembler::pn, *L_failure); + delayed()->nop(); + FINAL_JUMP(*L_success); + } + } + + bind(L_fallthrough); + +#undef FINAL_JUMP +} + + +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register count_temp, + Register scan_temp, + Register scratch_reg, + Register coop_reg, + Label* L_success, + Label* L_failure) { + assert_different_registers(sub_klass, super_klass, + count_temp, scan_temp, scratch_reg, coop_reg); + + Label L_fallthrough, L_loop; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in the batch"); + + // a couple of useful fields in sub_klass: + int ss_offset = in_bytes(Klass::secondary_supers_offset()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + + // Do a linear scan of the secondary super-klass chain. + // This code is rarely used, so simplicity is a virtue here. + +#ifndef PRODUCT + int* pst_counter = &SharedRuntime::_partial_subtype_ctr; + inc_counter((address) pst_counter, count_temp, scan_temp); +#endif + + // We will consult the secondary-super array. + ld_ptr(sub_klass, ss_offset, scan_temp); + + Register search_key = super_klass; + + // Load the array length. (Positive movl does right thing on LP64.) + lduw(scan_temp, Array::length_offset_in_bytes(), count_temp); + + // Check for empty secondary super list + tst(count_temp); + + // In the array of super classes elements are pointer sized. + int element_size = wordSize; + + // Top of search loop + bind(L_loop); + br(Assembler::equal, false, Assembler::pn, *L_failure); + delayed()->add(scan_temp, element_size, scan_temp); + + // Skip the array header in all array accesses. + int elem_offset = Array::base_offset_in_bytes(); + elem_offset -= element_size; // the scan pointer was pre-incremented also + + // Load next super to check + ld_ptr( scan_temp, elem_offset, scratch_reg ); + + // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list + cmp(scratch_reg, search_key); + + // A miss means we are NOT a subtype and need to keep looping + brx(Assembler::notEqual, false, Assembler::pn, L_loop); + delayed()->deccc(count_temp); // decrement trip counter in delay slot + + // Success. Cache the super we found and proceed in triumph. + st_ptr(super_klass, sub_klass, sc_offset); + + if (L_success != &L_fallthrough) { + ba(*L_success); + delayed()->nop(); + } + + bind(L_fallthrough); +} + + +RegisterOrConstant MacroAssembler::argument_offset(RegisterOrConstant arg_slot, + Register temp_reg, + int extra_slot_offset) { + // cf. TemplateTable::prepare_invoke(), if (load_receiver). + int stackElementSize = Interpreter::stackElementSize; + int offset = extra_slot_offset * stackElementSize; + if (arg_slot.is_constant()) { + offset += arg_slot.as_constant() * stackElementSize; + return offset; + } else { + assert(temp_reg != noreg, "must specify"); + sll_ptr(arg_slot.as_register(), exact_log2(stackElementSize), temp_reg); + if (offset != 0) + add(temp_reg, offset, temp_reg); + return temp_reg; + } +} + + +Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, + Register temp_reg, + int extra_slot_offset) { + return Address(Gargs, argument_offset(arg_slot, temp_reg, extra_slot_offset)); +} + + +void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg, + Register temp_reg, + Label& done, Label* slow_case, + BiasedLockingCounters* counters) { + assert(UseBiasedLocking, "why call this otherwise?"); + + if (PrintBiasedLockingStatistics) { + assert_different_registers(obj_reg, mark_reg, temp_reg, O7); + if (counters == NULL) + counters = BiasedLocking::counters(); + } + + Label cas_label; + + // Biased locking + // See whether the lock is currently biased toward our thread and + // whether the epoch is still valid + // Note that the runtime guarantees sufficient alignment of JavaThread + // pointers to allow age to be placed into low bits + assert(markWord::age_shift == markWord::lock_bits + markWord::biased_lock_bits, "biased locking makes assumptions about bit layout"); + and3(mark_reg, markWord::biased_lock_mask_in_place, temp_reg); + cmp_and_brx_short(temp_reg, markWord::biased_lock_pattern, Assembler::notEqual, Assembler::pn, cas_label); + + load_klass(obj_reg, temp_reg); + ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg); + or3(G2_thread, temp_reg, temp_reg); + xor3(mark_reg, temp_reg, temp_reg); + andcc(temp_reg, ~((int) markWord::age_mask_in_place), temp_reg); + if (counters != NULL) { + cond_inc(Assembler::equal, (address) counters->biased_lock_entry_count_addr(), mark_reg, temp_reg); + // Reload mark_reg as we may need it later + ld_ptr(Address(obj_reg, oopDesc::mark_offset_in_bytes()), mark_reg); + } + brx(Assembler::equal, true, Assembler::pt, done); + delayed()->nop(); + + Label try_revoke_bias; + Label try_rebias; + Address mark_addr = Address(obj_reg, oopDesc::mark_offset_in_bytes()); + assert(mark_addr.disp() == 0, "cas must take a zero displacement"); + + // At this point we know that the header has the bias pattern and + // that we are not the bias owner in the current epoch. We need to + // figure out more details about the state of the header in order to + // know what operations can be legally performed on the object's + // header. + + // If the low three bits in the xor result aren't clear, that means + // the prototype header is no longer biased and we have to revoke + // the bias on this object. + btst(markWord::biased_lock_mask_in_place, temp_reg); + brx(Assembler::notZero, false, Assembler::pn, try_revoke_bias); + + // Biasing is still enabled for this data type. See whether the + // epoch of the current bias is still valid, meaning that the epoch + // bits of the mark word are equal to the epoch bits of the + // prototype header. (Note that the prototype header's epoch bits + // only change at a safepoint.) If not, attempt to rebias the object + // toward the current thread. Note that we must be absolutely sure + // that the current epoch is invalid in order to do this because + // otherwise the manipulations it performs on the mark word are + // illegal. + delayed()->btst(markWord::epoch_mask_in_place, temp_reg); + brx(Assembler::notZero, false, Assembler::pn, try_rebias); + + // The epoch of the current bias is still valid but we know nothing + // about the owner; it might be set or it might be clear. Try to + // acquire the bias of the object using an atomic operation. If this + // fails we will go in to the runtime to revoke the object's bias. + // Note that we first construct the presumed unbiased header so we + // don't accidentally blow away another thread's valid bias. + delayed()->and3(mark_reg, + markWord::biased_lock_mask_in_place | markWord::age_mask_in_place | markWord::epoch_mask_in_place, + mark_reg); + or3(G2_thread, mark_reg, temp_reg); + cas_ptr(mark_addr.base(), mark_reg, temp_reg); + // If the biasing toward our thread failed, this means that + // another thread succeeded in biasing it toward itself and we + // need to revoke that bias. The revocation will occur in the + // interpreter runtime in the slow case. + cmp(mark_reg, temp_reg); + if (counters != NULL) { + cond_inc(Assembler::zero, (address) counters->anonymously_biased_lock_entry_count_addr(), mark_reg, temp_reg); + } + if (slow_case != NULL) { + brx(Assembler::notEqual, true, Assembler::pn, *slow_case); + delayed()->nop(); + } + ba_short(done); + + bind(try_rebias); + // At this point we know the epoch has expired, meaning that the + // current "bias owner", if any, is actually invalid. Under these + // circumstances _only_, we are allowed to use the current header's + // value as the comparison value when doing the cas to acquire the + // bias in the current epoch. In other words, we allow transfer of + // the bias from one thread to another directly in this situation. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + load_klass(obj_reg, temp_reg); + ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg); + or3(G2_thread, temp_reg, temp_reg); + cas_ptr(mark_addr.base(), mark_reg, temp_reg); + // If the biasing toward our thread failed, this means that + // another thread succeeded in biasing it toward itself and we + // need to revoke that bias. The revocation will occur in the + // interpreter runtime in the slow case. + cmp(mark_reg, temp_reg); + if (counters != NULL) { + cond_inc(Assembler::zero, (address) counters->rebiased_lock_entry_count_addr(), mark_reg, temp_reg); + } + if (slow_case != NULL) { + brx(Assembler::notEqual, true, Assembler::pn, *slow_case); + delayed()->nop(); + } + ba_short(done); + + bind(try_revoke_bias); + // The prototype mark in the klass doesn't have the bias bit set any + // more, indicating that objects of this data type are not supposed + // to be biased any more. We are going to try to reset the mark of + // this object to the prototype value and fall through to the + // CAS-based locking scheme. Note that if our CAS fails, it means + // that another thread raced us for the privilege of revoking the + // bias of this particular object, so it's okay to continue in the + // normal locking code. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + load_klass(obj_reg, temp_reg); + ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg); + cas_ptr(mark_addr.base(), mark_reg, temp_reg); + // Fall through to the normal CAS-based lock, because no matter what + // the result of the above CAS, some thread must have succeeded in + // removing the bias bit from the object's header. + if (counters != NULL) { + cmp(mark_reg, temp_reg); + cond_inc(Assembler::zero, (address) counters->revoked_lock_entry_count_addr(), mark_reg, temp_reg); + } + + bind(cas_label); +} + +void MacroAssembler::biased_locking_exit (Address mark_addr, Register temp_reg, Label& done, + bool allow_delay_slot_filling) { + // Check for biased locking unlock case, which is a no-op + // Note: we do not have to check the thread ID for two reasons. + // First, the interpreter checks for IllegalMonitorStateException at + // a higher level. Second, if the bias was revoked while we held the + // lock, the object could not be rebiased toward another thread, so + // the bias bit would be clear. + ld_ptr(mark_addr, temp_reg); + and3(temp_reg, markWord::biased_lock_mask_in_place, temp_reg); + cmp(temp_reg, markWord::biased_lock_pattern); + brx(Assembler::equal, allow_delay_slot_filling, Assembler::pt, done); + delayed(); + if (!allow_delay_slot_filling) { + nop(); + } +} + + +// compiler_lock_object() and compiler_unlock_object() are direct transliterations +// of i486.ad fast_lock() and fast_unlock(). See those methods for detailed comments. +// The code could be tightened up considerably. +// +// box->dhw disposition - post-conditions at DONE_LABEL. +// - Successful inflated lock: box->dhw != 0. +// Any non-zero value suffices. +// Consider G2_thread, rsp, boxReg, or markWord::unused_mark() +// - Successful Stack-lock: box->dhw == mark. +// box->dhw must contain the displaced mark word value +// - Failure -- icc.ZFlag == 0 and box->dhw is undefined. +// The slow-path enter() is responsible for setting +// box->dhw = NonZero (typically markWord::unused_mark()). +// - Biased: box->dhw is undefined +// +// SPARC refworkload performance - specifically jetstream and scimark - are +// extremely sensitive to the size of the code emitted by compiler_lock_object +// and compiler_unlock_object. Critically, the key factor is code size, not path +// length. (Simply experiments to pad CLO with unexecuted NOPs demonstrte the +// effect). + + +void MacroAssembler::compiler_lock_object(Register Roop, Register Rmark, + Register Rbox, Register Rscratch, + BiasedLockingCounters* counters, + bool try_bias) { + Address mark_addr(Roop, oopDesc::mark_offset_in_bytes()); + + verify_oop(Roop); + Label done ; + + if (counters != NULL) { + inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch); + } + + // Aggressively avoid the Store-before-CAS penalty + // Defer the store into box->dhw until after the CAS + Label IsInflated, Recursive ; + +// Anticipate CAS -- Avoid RTS->RTO upgrade +// prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads); + + ld_ptr(mark_addr, Rmark); // fetch obj->mark + // Triage: biased, stack-locked, neutral, inflated + + if (try_bias) { + biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters); + // Invariant: if control reaches this point in the emitted stream + // then Rmark has not been modified. + } + andcc(Rmark, 2, G0); + brx(Assembler::notZero, false, Assembler::pn, IsInflated); + delayed()-> // Beware - dangling delay-slot + + // Try stack-lock acquisition. + // Transiently install BUSY (0) encoding in the mark word. + // if the CAS of 0 into the mark was successful then we execute: + // ST box->dhw = mark -- save fetched mark in on-stack basiclock box + // ST obj->mark = box -- overwrite transient 0 value + // This presumes TSO, of course. + + mov(0, Rscratch); + or3(Rmark, markWord::unlocked_value, Rmark); + assert(mark_addr.disp() == 0, "cas must take a zero displacement"); + cas_ptr(mark_addr.base(), Rmark, Rscratch); +// prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads); + cmp(Rscratch, Rmark); + brx(Assembler::notZero, false, Assembler::pn, Recursive); + delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); + if (counters != NULL) { + cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch); + } + ba(done); + delayed()->st_ptr(Rbox, mark_addr); + + bind(Recursive); + // Stack-lock attempt failed - check for recursive stack-lock. + // Tests show that we can remove the recursive case with no impact + // on refworkload 0.83. If we need to reduce the size of the code + // emitted by compiler_lock_object() the recursive case is perfect + // candidate. + // + // A more extreme idea is to always inflate on stack-lock recursion. + // This lets us eliminate the recursive checks in compiler_lock_object + // and compiler_unlock_object and the (box->dhw == 0) encoding. + // A brief experiment - requiring changes to synchronizer.cpp, interpreter, + // and showed a performance *increase*. In the same experiment I eliminated + // the fast-path stack-lock code from the interpreter and always passed + // control to the "slow" operators in synchronizer.cpp. + + // RScratch contains the fetched obj->mark value from the failed CAS. + sub(Rscratch, STACK_BIAS, Rscratch); + sub(Rscratch, SP, Rscratch); + assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); + andcc(Rscratch, 0xfffff003, Rscratch); + if (counters != NULL) { + // Accounting needs the Rscratch register + st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); + cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch); + ba_short(done); + } else { + ba(done); + delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); + } + + bind (IsInflated); + + // Try to CAS m->owner from null to Self + // Invariant: if we acquire the lock then _recursions should be 0. + add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark); + mov(G2_thread, Rscratch); + cas_ptr(Rmark, G0, Rscratch); + andcc(Rscratch, Rscratch, G0); // set ICCs for done: icc.zf iff success + // set icc.zf : 1=success 0=failure + // ST box->displaced_header = NonZero. + // Any non-zero value suffices: + // markWord::unused_mark(), G2_thread, RBox, RScratch, rsp, etc. + st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes()); + // Intentional fall-through into done + + bind (done); +} + +void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark, + Register Rbox, Register Rscratch, + bool try_bias) { + Address mark_addr(Roop, oopDesc::mark_offset_in_bytes()); + + Label done ; + + // Beware ... If the aggregate size of the code emitted by CLO and CUO is + // is too large performance rolls abruptly off a cliff. + // This could be related to inlining policies, code cache management, or + // I$ effects. + Label LStacked ; + + if (try_bias) { + // TODO: eliminate redundant LDs of obj->mark + biased_locking_exit(mark_addr, Rscratch, done); + } + + ld_ptr(Roop, oopDesc::mark_offset_in_bytes(), Rmark); + ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rscratch); + andcc(Rscratch, Rscratch, G0); + brx(Assembler::zero, false, Assembler::pn, done); + delayed()->nop(); // consider: relocate fetch of mark, above, into this DS + andcc(Rmark, 2, G0); + brx(Assembler::zero, false, Assembler::pt, LStacked); + delayed()->nop(); + + // It's inflated + // Conceptually we need a #loadstore|#storestore "release" MEMBAR before + // the ST of 0 into _owner which releases the lock. This prevents loads + // and stores within the critical section from reordering (floating) + // past the store that releases the lock. But TSO is a strong memory model + // and that particular flavor of barrier is a noop, so we can safely elide it. + // Note that we use 1-0 locking by default for the inflated case. We + // close the resultant (and rare) race by having contended threads in + // monitorenter periodically poll _owner. + + // 1-0 form : avoids CAS and MEMBAR in the common case + // Do not bother to ratify that m->Owner == Self. + ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox); + orcc(Rbox, G0, G0); + brx(Assembler::notZero, false, Assembler::pn, done); + delayed()-> + ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch); + ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox); + orcc(Rbox, Rscratch, G0); + brx(Assembler::zero, false, Assembler::pt, done); + delayed()-> + st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); + + membar(StoreLoad); + // Check that _succ is (or remains) non-zero + ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch); + andcc(Rscratch, Rscratch, G0); + brx(Assembler::notZero, false, Assembler::pt, done); + delayed()->andcc(G0, G0, G0); + add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark); + mov(G2_thread, Rscratch); + cas_ptr(Rmark, G0, Rscratch); + cmp(Rscratch, G0); + // invert icc.zf and goto done + // A slightly better v8+/v9 idiom would be the following: + // movrnz Rscratch,1,Rscratch + // ba done + // xorcc Rscratch,1,G0 + // In v8+ mode the idiom would be valid IFF Rscratch was a G or O register + brx(Assembler::notZero, false, Assembler::pt, done); + delayed()->cmp(G0, G0); + br(Assembler::always, false, Assembler::pt, done); + delayed()->cmp(G0, 1); + + bind (LStacked); + // Consider: we could replace the expensive CAS in the exit + // path with a simple ST of the displaced mark value fetched from + // the on-stack basiclock box. That admits a race where a thread T2 + // in the slow lock path -- inflating with monitor M -- could race a + // thread T1 in the fast unlock path, resulting in a missed wakeup for T2. + // More precisely T1 in the stack-lock unlock path could "stomp" the + // inflated mark value M installed by T2, resulting in an orphan + // object monitor M and T2 becoming stranded. We can remedy that situation + // by having T2 periodically poll the object's mark word using timed wait + // operations. If T2 discovers that a stomp has occurred it vacates + // the monitor M and wakes any other threads stranded on the now-orphan M. + // In addition the monitor scavenger, which performs deflation, + // would also need to check for orphan monitors and stranded threads. + // + // Finally, inflation is also used when T2 needs to assign a hashCode + // to O and O is stack-locked by T1. The "stomp" race could cause + // an assigned hashCode value to be lost. We can avoid that condition + // and provide the necessary hashCode stability invariants by ensuring + // that hashCode generation is idempotent between copying GCs. + // For example we could compute the hashCode of an object O as + // O's heap address XOR some high quality RNG value that is refreshed + // at GC-time. The monitor scavenger would install the hashCode + // found in any orphan monitors. Again, the mechanism admits a + // lost-update "stomp" WAW race but detects and recovers as needed. + // + // A prototype implementation showed excellent results, although + // the scavenger and timeout code was rather involved. + + cas_ptr(mark_addr.base(), Rbox, Rscratch); + cmp(Rbox, Rscratch); + // Intentional fall through into done ... + + bind(done); +} + +void MacroAssembler::verify_tlab() { +#ifdef ASSERT + if (UseTLAB && VerifyOops) { + Label next, next2, ok; + Register t1 = L0; + Register t2 = L1; + Register t3 = L2; + + save_frame(0); + ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1); + ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2); + or3(t1, t2, t3); + cmp_and_br_short(t1, t2, Assembler::greaterEqual, Assembler::pn, next); + STOP("assert(top >= start)"); + should_not_reach_here(); + + bind(next); + ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1); + ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t2); + or3(t3, t2, t3); + cmp_and_br_short(t1, t2, Assembler::lessEqual, Assembler::pn, next2); + STOP("assert(top <= end)"); + should_not_reach_here(); + + bind(next2); + and3(t3, MinObjAlignmentInBytesMask, t3); + cmp_and_br_short(t3, 0, Assembler::lessEqual, Assembler::pn, ok); + STOP("assert(aligned)"); + should_not_reach_here(); + + bind(ok); + restore(); + } +#endif +} + + +void MacroAssembler::eden_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, // temp register + Label& slow_case // continuation point if fast allocation fails +){ + // make sure arguments make sense + assert_different_registers(obj, var_size_in_bytes, t1, t2); + assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size"); + assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment"); + + if (!Universe::heap()->supports_inline_contig_alloc()) { + // No allocation in the shared eden. + ba(slow_case); + delayed()->nop(); + } else { + // get eden boundaries + // note: we need both top & top_addr! + const Register top_addr = t1; + const Register end = t2; + + CollectedHeap* ch = Universe::heap(); + set((intx)ch->top_addr(), top_addr); + intx delta = (intx)ch->end_addr() - (intx)ch->top_addr(); + ld_ptr(top_addr, delta, end); + ld_ptr(top_addr, 0, obj); + + // try to allocate + Label retry; + bind(retry); +#ifdef ASSERT + // make sure eden top is properly aligned + { + Label L; + btst(MinObjAlignmentInBytesMask, obj); + br(Assembler::zero, false, Assembler::pt, L); + delayed()->nop(); + STOP("eden top is not properly aligned"); + bind(L); + } +#endif // ASSERT + const Register free = end; + sub(end, obj, free); // compute amount of free space + if (var_size_in_bytes->is_valid()) { + // size is unknown at compile time + cmp(free, var_size_in_bytes); + brx(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case + delayed()->add(obj, var_size_in_bytes, end); + } else { + // size is known at compile time + cmp(free, con_size_in_bytes); + brx(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case + delayed()->add(obj, con_size_in_bytes, end); + } + // Compare obj with the value at top_addr; if still equal, swap the value of + // end with the value at top_addr. If not equal, read the value at top_addr + // into end. + cas_ptr(top_addr, obj, end); + // if someone beat us on the allocation, try again, otherwise continue + cmp(obj, end); + brx(Assembler::notEqual, false, Assembler::pn, retry); + delayed()->mov(end, obj); // nop if successful since obj == end + +#ifdef ASSERT + // make sure eden top is properly aligned + { + Label L; + const Register top_addr = t1; + + set((intx)ch->top_addr(), top_addr); + ld_ptr(top_addr, 0, top_addr); + btst(MinObjAlignmentInBytesMask, top_addr); + br(Assembler::zero, false, Assembler::pt, L); + delayed()->nop(); + STOP("eden top is not properly aligned"); + bind(L); + } +#endif // ASSERT + } +} + + +void MacroAssembler::tlab_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Label& slow_case // continuation point if fast allocation fails +){ + // make sure arguments make sense + assert_different_registers(obj, var_size_in_bytes, t1); + assert(0 <= con_size_in_bytes && is_simm13(con_size_in_bytes), "illegal object size"); + assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment"); + + const Register free = t1; + + verify_tlab(); + + ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), obj); + + // calculate amount of free space + ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), free); + sub(free, obj, free); + + Label done; + if (var_size_in_bytes == noreg) { + cmp(free, con_size_in_bytes); + } else { + cmp(free, var_size_in_bytes); + } + br(Assembler::less, false, Assembler::pn, slow_case); + // calculate the new top pointer + if (var_size_in_bytes == noreg) { + delayed()->add(obj, con_size_in_bytes, free); + } else { + delayed()->add(obj, var_size_in_bytes, free); + } + + bind(done); + +#ifdef ASSERT + // make sure new free pointer is properly aligned + { + Label L; + btst(MinObjAlignmentInBytesMask, free); + br(Assembler::zero, false, Assembler::pt, L); + delayed()->nop(); + STOP("updated TLAB free is not properly aligned"); + bind(L); + } +#endif // ASSERT + + // update the tlab top pointer + st_ptr(free, G2_thread, in_bytes(JavaThread::tlab_top_offset())); + verify_tlab(); +} + +void MacroAssembler::zero_memory(Register base, Register index) { + assert_different_registers(base, index); + Label loop; + bind(loop); + subcc(index, HeapWordSize, index); + brx(Assembler::greaterEqual, true, Assembler::pt, loop); + delayed()->st_ptr(G0, base, index); +} + +void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, + Register t1, Register t2) { + // Bump total bytes allocated by this thread + assert(t1->is_global(), "must be global reg"); // so all 64 bits are saved on a context switch + assert_different_registers(size_in_bytes.register_or_noreg(), t1, t2); + // v8 support has gone the way of the dodo + ldx(G2_thread, in_bytes(JavaThread::allocated_bytes_offset()), t1); + add(t1, ensure_simm13_or_reg(size_in_bytes, t2), t1); + stx(t1, G2_thread, in_bytes(JavaThread::allocated_bytes_offset())); +} + +Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { + switch (cond) { + // Note some conditions are synonyms for others + case Assembler::never: return Assembler::always; + case Assembler::zero: return Assembler::notZero; + case Assembler::lessEqual: return Assembler::greater; + case Assembler::less: return Assembler::greaterEqual; + case Assembler::lessEqualUnsigned: return Assembler::greaterUnsigned; + case Assembler::lessUnsigned: return Assembler::greaterEqualUnsigned; + case Assembler::negative: return Assembler::positive; + case Assembler::overflowSet: return Assembler::overflowClear; + case Assembler::always: return Assembler::never; + case Assembler::notZero: return Assembler::zero; + case Assembler::greater: return Assembler::lessEqual; + case Assembler::greaterEqual: return Assembler::less; + case Assembler::greaterUnsigned: return Assembler::lessEqualUnsigned; + case Assembler::greaterEqualUnsigned: return Assembler::lessUnsigned; + case Assembler::positive: return Assembler::negative; + case Assembler::overflowClear: return Assembler::overflowSet; + } + + ShouldNotReachHere(); return Assembler::overflowClear; +} + +void MacroAssembler::cond_inc(Assembler::Condition cond, address counter_ptr, + Register Rtmp1, Register Rtmp2 /*, Register Rtmp3, Register Rtmp4 */) { + Condition negated_cond = negate_condition(cond); + Label L; + brx(negated_cond, false, Assembler::pt, L); + delayed()->nop(); + inc_counter(counter_ptr, Rtmp1, Rtmp2); + bind(L); +} + +void MacroAssembler::inc_counter(address counter_addr, Register Rtmp1, Register Rtmp2) { + AddressLiteral addrlit(counter_addr); + sethi(addrlit, Rtmp1); // Move hi22 bits into temporary register. + Address addr(Rtmp1, addrlit.low10()); // Build an address with low10 bits. + ld(addr, Rtmp2); + inc(Rtmp2); + st(Rtmp2, addr); +} + +void MacroAssembler::inc_counter(int* counter_addr, Register Rtmp1, Register Rtmp2) { + inc_counter((address) counter_addr, Rtmp1, Rtmp2); +} + +SkipIfEqual::SkipIfEqual( + MacroAssembler* masm, Register temp, const bool* flag_addr, + Assembler::Condition condition) { + _masm = masm; + AddressLiteral flag(flag_addr); + _masm->sethi(flag, temp); + _masm->ldub(temp, flag.low10(), temp); + _masm->tst(temp); + _masm->br(condition, false, Assembler::pt, _label); + _masm->delayed()->nop(); +} + +SkipIfEqual::~SkipIfEqual() { + _masm->bind(_label); +} + +void MacroAssembler::bang_stack_with_offset(int offset) { + // stack grows down, caller passes positive offset + assert(offset > 0, "must bang with negative offset"); + set((-offset)+STACK_BIAS, G3_scratch); + st(G0, SP, G3_scratch); +} + +// Writes to stack successive pages until offset reached to check for +// stack overflow + shadow pages. This clobbers tsp and scratch. +void MacroAssembler::bang_stack_size(Register Rsize, Register Rtsp, + Register Rscratch) { + // Use stack pointer in temp stack pointer + mov(SP, Rtsp); + + // Bang stack for total size given plus stack shadow page size. + // Bang one page at a time because a large size can overflow yellow and + // red zones (the bang will fail but stack overflow handling can't tell that + // it was a stack overflow bang vs a regular segv). + int offset = os::vm_page_size(); + Register Roffset = Rscratch; + + Label loop; + bind(loop); + set((-offset)+STACK_BIAS, Rscratch); + st(G0, Rtsp, Rscratch); + set(offset, Roffset); + sub(Rsize, Roffset, Rsize); + cmp(Rsize, G0); + br(Assembler::greater, false, Assembler::pn, loop); + delayed()->sub(Rtsp, Roffset, Rtsp); + + // Bang down shadow pages too. + // At this point, (tmp-0) is the last address touched, so don't + // touch it again. (It was touched as (tmp-pagesize) but then tmp + // was post-decremented.) Skip this address by starting at i=1, and + // touch a few more pages below. N.B. It is important to touch all + // the way down to and including i=StackShadowPages. + for (int i = 1; i < StackOverflow::stack_shadow_zone_size() / os::vm_page_size(); i++) { + set((-i*offset)+STACK_BIAS, Rscratch); + st(G0, Rtsp, Rscratch); + } +} + +void MacroAssembler::reserved_stack_check() { + // testing if reserved zone needs to be enabled + Label no_reserved_zone_enabling; + + ld_ptr(G2_thread, JavaThread::reserved_stack_activation_offset(), G4_scratch); + cmp_and_brx_short(SP, G4_scratch, Assembler::lessUnsigned, Assembler::pt, no_reserved_zone_enabling); + + call_VM_leaf(L0, CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), G2_thread); + + AddressLiteral stub(StubRoutines::throw_delayed_StackOverflowError_entry()); + jump_to(stub, G4_scratch); + delayed()->restore(); + + should_not_reach_here(); + + bind(no_reserved_zone_enabling); +} +// ((OopHandle)result).resolve(); +void MacroAssembler::resolve_oop_handle(Register result, Register tmp) { + // OopHandle::resolve is an indirection. + access_load_at(T_OBJECT, IN_NATIVE, Address(result, 0), result, tmp); +} + +void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) { + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + ld_ptr(method, in_bytes(Method::const_offset()), mirror); + ld_ptr(mirror, in_bytes(ConstMethod::constants_offset()), mirror); + ld_ptr(mirror, ConstantPool::pool_holder_offset_in_bytes(), mirror); + ld_ptr(mirror, mirror_offset, mirror); + resolve_oop_handle(mirror, tmp); +} + +void MacroAssembler::load_klass(Register src_oop, Register klass) { + // The number of bytes in this code is used by + // MachCallDynamicJavaNode::ret_addr_offset() + // if this changes, change that. + if (UseCompressedClassPointers) { + lduw(src_oop, oopDesc::klass_offset_in_bytes(), klass); + decode_klass_not_null(klass); + } else { + ld_ptr(src_oop, oopDesc::klass_offset_in_bytes(), klass); + } +} + +void MacroAssembler::store_klass(Register klass, Register dst_oop) { + if (UseCompressedClassPointers) { + assert(dst_oop != klass, "not enough registers"); + encode_klass_not_null(klass); + st(klass, dst_oop, oopDesc::klass_offset_in_bytes()); + } else { + st_ptr(klass, dst_oop, oopDesc::klass_offset_in_bytes()); + } +} + +void MacroAssembler::store_klass_gap(Register s, Register d) { + if (UseCompressedClassPointers) { + assert(s != d, "not enough registers"); + st(s, d, oopDesc::klass_gap_offset_in_bytes()); + } +} + +void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, + Register src, Address dst, Register tmp) { + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + decorators = AccessInternal::decorator_fixup(decorators); + bool as_raw = (decorators & AS_RAW) != 0; + if (as_raw) { + bs->BarrierSetAssembler::store_at(this, decorators, type, src, dst, tmp); + } else { + bs->store_at(this, decorators, type, src, dst, tmp); + } +} + +void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, + Address src, Register dst, Register tmp) { + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + decorators = AccessInternal::decorator_fixup(decorators); + bool as_raw = (decorators & AS_RAW) != 0; + if (as_raw) { + bs->BarrierSetAssembler::load_at(this, decorators, type, src, dst, tmp); + } else { + bs->load_at(this, decorators, type, src, dst, tmp); + } +} + +void MacroAssembler::load_heap_oop(const Address& s, Register d, Register tmp, DecoratorSet decorators) { + access_load_at(T_OBJECT, IN_HEAP | decorators, s, d, tmp); +} + +void MacroAssembler::load_heap_oop(Register s1, Register s2, Register d, Register tmp, DecoratorSet decorators) { + access_load_at(T_OBJECT, IN_HEAP | decorators, Address(s1, s2), d, tmp); +} + +void MacroAssembler::load_heap_oop(Register s1, int simm13a, Register d, Register tmp, DecoratorSet decorators) { + access_load_at(T_OBJECT, IN_HEAP | decorators, Address(s1, simm13a), d, tmp); +} + +void MacroAssembler::load_heap_oop(Register s1, RegisterOrConstant s2, Register d, Register tmp, DecoratorSet decorators) { + if (s2.is_constant()) { + access_load_at(T_OBJECT, IN_HEAP | decorators, Address(s1, s2.as_constant()), d, tmp); + } else { + access_load_at(T_OBJECT, IN_HEAP | decorators, Address(s1, s2.as_register()), d, tmp); + } +} + +void MacroAssembler::store_heap_oop(Register d, Register s1, Register s2, Register tmp, DecoratorSet decorators) { + access_store_at(T_OBJECT, IN_HEAP | decorators, d, Address(s1, s2), tmp); +} + +void MacroAssembler::store_heap_oop(Register d, Register s1, int simm13a, Register tmp, DecoratorSet decorators) { + access_store_at(T_OBJECT, IN_HEAP | decorators, d, Address(s1, simm13a), tmp); +} + +void MacroAssembler::store_heap_oop(Register d, const Address& a, int offset, Register tmp, DecoratorSet decorators) { + if (a.has_index()) { + assert(!a.has_disp(), "not supported yet"); + assert(offset == 0, "not supported yet"); + access_store_at(T_OBJECT, IN_HEAP | decorators, d, Address(a.base(), a.index()), tmp); + } else { + access_store_at(T_OBJECT, IN_HEAP | decorators, d, Address(a.base(), a.disp() + offset), tmp); + } +} + + +void MacroAssembler::encode_heap_oop(Register src, Register dst) { + assert (UseCompressedOops, "must be compressed"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); + verify_oop(src); + if (CompressedOops::base() == NULL) { + srlx(src, LogMinObjAlignmentInBytes, dst); + return; + } + Label done; + if (src == dst) { + // optimize for frequent case src == dst + bpr(rc_nz, true, Assembler::pt, src, done); + delayed() -> sub(src, G6_heapbase, dst); // annulled if not taken + bind(done); + srlx(src, LogMinObjAlignmentInBytes, dst); + } else { + bpr(rc_z, false, Assembler::pn, src, done); + delayed() -> mov(G0, dst); + // could be moved before branch, and annulate delay, + // but may add some unneeded work decoding null + sub(src, G6_heapbase, dst); + srlx(dst, LogMinObjAlignmentInBytes, dst); + bind(done); + } +} + + +void MacroAssembler::encode_heap_oop_not_null(Register r) { + assert (UseCompressedOops, "must be compressed"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); + verify_oop(r); + if (CompressedOops::base() != NULL) + sub(r, G6_heapbase, r); + srlx(r, LogMinObjAlignmentInBytes, r); +} + +void MacroAssembler::encode_heap_oop_not_null(Register src, Register dst) { + assert (UseCompressedOops, "must be compressed"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); + verify_oop(src); + if (CompressedOops::base() == NULL) { + srlx(src, LogMinObjAlignmentInBytes, dst); + } else { + sub(src, G6_heapbase, dst); + srlx(dst, LogMinObjAlignmentInBytes, dst); + } +} + +// Same algorithm as oops.inline.hpp decode_heap_oop. +void MacroAssembler::decode_heap_oop(Register src, Register dst) { + assert (UseCompressedOops, "must be compressed"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); + sllx(src, LogMinObjAlignmentInBytes, dst); + if (CompressedOops::base() != NULL) { + Label done; + bpr(rc_nz, true, Assembler::pt, dst, done); + delayed() -> add(dst, G6_heapbase, dst); // annulled if not taken + bind(done); + } + verify_oop(dst); +} + +void MacroAssembler::decode_heap_oop_not_null(Register r) { + // Do not add assert code to this unless you change vtableStubs_sparc.cpp + // pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + assert (UseCompressedOops, "must be compressed"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); + sllx(r, LogMinObjAlignmentInBytes, r); + if (CompressedOops::base() != NULL) + add(r, G6_heapbase, r); +} + +void MacroAssembler::decode_heap_oop_not_null(Register src, Register dst) { + // Do not add assert code to this unless you change vtableStubs_sparc.cpp + // pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + assert (UseCompressedOops, "must be compressed"); + assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); + sllx(src, LogMinObjAlignmentInBytes, dst); + if (CompressedOops::base() != NULL) + add(dst, G6_heapbase, dst); +} + +void MacroAssembler::encode_klass_not_null(Register r) { + assert (UseCompressedClassPointers, "must be compressed"); + if (CompressedKlassPointers::base() != NULL) { + assert(r != G6_heapbase, "bad register choice"); + set((intptr_t)CompressedKlassPointers::base(), G6_heapbase); + sub(r, G6_heapbase, r); + if (CompressedKlassPointers::shift() != 0) { + assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); + srlx(r, LogKlassAlignmentInBytes, r); + } + reinit_heapbase(); + } else { + assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift() || CompressedKlassPointers::shift() == 0, "decode alg wrong"); + srlx(r, CompressedKlassPointers::shift(), r); + } +} + +void MacroAssembler::encode_klass_not_null(Register src, Register dst) { + if (src == dst) { + encode_klass_not_null(src); + } else { + assert (UseCompressedClassPointers, "must be compressed"); + if (CompressedKlassPointers::base() != NULL) { + set((intptr_t)CompressedKlassPointers::base(), dst); + sub(src, dst, dst); + if (CompressedKlassPointers::shift() != 0) { + srlx(dst, LogKlassAlignmentInBytes, dst); + } + } else { + // shift src into dst + assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift() || CompressedKlassPointers::shift() == 0, "decode alg wrong"); + srlx(src, CompressedKlassPointers::shift(), dst); + } + } +} + +// Function instr_size_for_decode_klass_not_null() counts the instructions +// generated by decode_klass_not_null() and reinit_heapbase(). Hence, if +// the instructions they generate change, then this method needs to be updated. +int MacroAssembler::instr_size_for_decode_klass_not_null() { + assert (UseCompressedClassPointers, "only for compressed klass ptrs"); + int num_instrs = 1; // shift src,dst or add + if (CompressedKlassPointers::base() != NULL) { + // set + add + set + num_instrs += insts_for_internal_set((intptr_t)CompressedKlassPointers::base()) + + insts_for_internal_set((intptr_t)CompressedOops::ptrs_base()); + if (CompressedKlassPointers::shift() != 0) { + num_instrs += 1; // sllx + } + } + return num_instrs * BytesPerInstWord; +} + +// !!! If the instructions that get generated here change then function +// instr_size_for_decode_klass_not_null() needs to get updated. +void MacroAssembler::decode_klass_not_null(Register r) { + // Do not add assert code to this unless you change vtableStubs_sparc.cpp + // pd_code_size_limit. + assert (UseCompressedClassPointers, "must be compressed"); + if (CompressedKlassPointers::base() != NULL) { + assert(r != G6_heapbase, "bad register choice"); + set((intptr_t)CompressedKlassPointers::base(), G6_heapbase); + if (CompressedKlassPointers::shift() != 0) + sllx(r, LogKlassAlignmentInBytes, r); + add(r, G6_heapbase, r); + reinit_heapbase(); + } else { + assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift() || CompressedKlassPointers::shift() == 0, "decode alg wrong"); + sllx(r, CompressedKlassPointers::shift(), r); + } +} + +void MacroAssembler::decode_klass_not_null(Register src, Register dst) { + if (src == dst) { + decode_klass_not_null(src); + } else { + // Do not add assert code to this unless you change vtableStubs_sparc.cpp + // pd_code_size_limit. + assert (UseCompressedClassPointers, "must be compressed"); + if (CompressedKlassPointers::base() != NULL) { + if (CompressedKlassPointers::shift() != 0) { + assert((src != G6_heapbase) && (dst != G6_heapbase), "bad register choice"); + set((intptr_t)CompressedKlassPointers::base(), G6_heapbase); + sllx(src, LogKlassAlignmentInBytes, dst); + add(dst, G6_heapbase, dst); + reinit_heapbase(); + } else { + set((intptr_t)CompressedKlassPointers::base(), dst); + add(src, dst, dst); + } + } else { + // shift/mov src into dst. + assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift() || CompressedKlassPointers::shift() == 0, "decode alg wrong"); + sllx(src, CompressedKlassPointers::shift(), dst); + } + } +} + +void MacroAssembler::reinit_heapbase() { + if (UseCompressedOops || UseCompressedClassPointers) { + if (Universe::heap() != NULL) { + set((intptr_t)CompressedOops::ptrs_base(), G6_heapbase); + } else { + AddressLiteral base(CompressedOops::ptrs_base_addr()); + load_ptr_contents(base, G6_heapbase); + } + } +} + +// Use BIS for zeroing (count is in bytes). +void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) { + assert(UseBlockZeroing && VM_Version::has_blk_zeroing(), "only works with BIS zeroing"); + Register end = count; + int cache_line_size = VM_Version::prefetch_data_size(); + assert(cache_line_size > 0, "cache line size should be known for this code"); + // Minimum count when BIS zeroing can be used since + // it needs membar which is expensive. + int block_zero_size = MAX2(cache_line_size*3, (int)BlockZeroingLowLimit); + + Label small_loop; + // Check if count is negative (dead code) or zero. + // Note, count uses 64bit in 64 bit VM. + cmp_and_brx_short(count, 0, Assembler::lessEqual, Assembler::pn, Ldone); + + // Use BIS zeroing only for big arrays since it requires membar. + if (Assembler::is_simm13(block_zero_size)) { // < 4096 + cmp(count, block_zero_size); + } else { + set(block_zero_size, temp); + cmp(count, temp); + } + br(Assembler::lessUnsigned, false, Assembler::pt, small_loop); + delayed()->add(to, count, end); + + // Note: size is >= three (32 bytes) cache lines. + + // Clean the beginning of space up to next cache line. + for (int offs = 0; offs < cache_line_size; offs += 8) { + stx(G0, to, offs); + } + + // align to next cache line + add(to, cache_line_size, to); + and3(to, -cache_line_size, to); + + // Note: size left >= two (32 bytes) cache lines. + + // BIS should not be used to zero tail (64 bytes) + // to avoid zeroing a header of the following object. + sub(end, (cache_line_size*2)-8, end); + + Label bis_loop; + bind(bis_loop); + stxa(G0, to, G0, Assembler::ASI_ST_BLKINIT_PRIMARY); + add(to, cache_line_size, to); + cmp_and_brx_short(to, end, Assembler::lessUnsigned, Assembler::pt, bis_loop); + + // BIS needs membar. + membar(Assembler::StoreLoad); + + add(end, (cache_line_size*2)-8, end); // restore end + cmp_and_brx_short(to, end, Assembler::greaterEqualUnsigned, Assembler::pn, Ldone); + + // Clean the tail. + bind(small_loop); + stx(G0, to, 0); + add(to, 8, to); + cmp_and_brx_short(to, end, Assembler::lessUnsigned, Assembler::pt, small_loop); + nop(); // Separate short branches +} + +/** + * Update CRC-32[C] with a byte value according to constants in table + * + * @param [in,out]crc Register containing the crc. + * @param [in]val Register containing the byte to fold into the CRC. + * @param [in]table Register containing the table of crc constants. + * + * uint32_t crc; + * val = crc_table[(val ^ crc) & 0xFF]; + * crc = val ^ (crc >> 8); + */ +void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { + xor3(val, crc, val); + and3(val, 0xFF, val); + sllx(val, 2, val); + lduw(table, val, val); + srlx(crc, 8, crc); + xor3(val, crc, crc); +} + +// Reverse byte order of lower 32 bits, assuming upper 32 bits all zeros +void MacroAssembler::reverse_bytes_32(Register src, Register dst, Register tmp) { + srlx(src, 24, dst); + + sllx(src, 32+8, tmp); + srlx(tmp, 32+24, tmp); + sllx(tmp, 8, tmp); + or3(dst, tmp, dst); + + sllx(src, 32+16, tmp); + srlx(tmp, 32+24, tmp); + sllx(tmp, 16, tmp); + or3(dst, tmp, dst); + + sllx(src, 32+24, tmp); + srlx(tmp, 32, tmp); + or3(dst, tmp, dst); +} + +void MacroAssembler::movitof_revbytes(Register src, FloatRegister dst, Register tmp1, Register tmp2) { + reverse_bytes_32(src, tmp1, tmp2); + movxtod(tmp1, dst); +} + +void MacroAssembler::movftoi_revbytes(FloatRegister src, Register dst, Register tmp1, Register tmp2) { + movdtox(src, tmp1); + reverse_bytes_32(tmp1, dst, tmp2); +} + +void MacroAssembler::fold_128bit_crc32(Register xcrc_hi, Register xcrc_lo, Register xK_hi, Register xK_lo, Register xtmp_hi, Register xtmp_lo, Register buf, int offset) { + xmulx(xcrc_hi, xK_hi, xtmp_lo); + xmulxhi(xcrc_hi, xK_hi, xtmp_hi); + xmulxhi(xcrc_lo, xK_lo, xcrc_hi); + xmulx(xcrc_lo, xK_lo, xcrc_lo); + xor3(xcrc_lo, xtmp_lo, xcrc_lo); + xor3(xcrc_hi, xtmp_hi, xcrc_hi); + ldxl(buf, G0, xtmp_lo); + inc(buf, 8); + ldxl(buf, G0, xtmp_hi); + inc(buf, 8); + xor3(xcrc_lo, xtmp_lo, xcrc_lo); + xor3(xcrc_hi, xtmp_hi, xcrc_hi); +} + +void MacroAssembler::fold_128bit_crc32(Register xcrc_hi, Register xcrc_lo, Register xK_hi, Register xK_lo, Register xtmp_hi, Register xtmp_lo, Register xbuf_hi, Register xbuf_lo) { + mov(xcrc_lo, xtmp_lo); + mov(xcrc_hi, xtmp_hi); + xmulx(xtmp_hi, xK_hi, xtmp_lo); + xmulxhi(xtmp_hi, xK_hi, xtmp_hi); + xmulxhi(xcrc_lo, xK_lo, xcrc_hi); + xmulx(xcrc_lo, xK_lo, xcrc_lo); + xor3(xcrc_lo, xbuf_lo, xcrc_lo); + xor3(xcrc_hi, xbuf_hi, xcrc_hi); + xor3(xcrc_lo, xtmp_lo, xcrc_lo); + xor3(xcrc_hi, xtmp_hi, xcrc_hi); +} + +void MacroAssembler::fold_8bit_crc32(Register xcrc, Register table, Register xtmp, Register tmp) { + and3(xcrc, 0xFF, tmp); + sllx(tmp, 2, tmp); + lduw(table, tmp, xtmp); + srlx(xcrc, 8, xcrc); + xor3(xtmp, xcrc, xcrc); +} + +void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) { + and3(crc, 0xFF, tmp); + srlx(crc, 8, crc); + sllx(tmp, 2, tmp); + lduw(table, tmp, tmp); + xor3(tmp, crc, crc); +} + +#define CRC32_TMP_REG_NUM 18 + +#define CRC32_CONST_64 0x163cd6124 +#define CRC32_CONST_96 0x0ccaa009e +#define CRC32_CONST_160 0x1751997d0 +#define CRC32_CONST_480 0x1c6e41596 +#define CRC32_CONST_544 0x154442bd4 + +void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register table) { + + Label L_cleanup_loop, L_cleanup_check, L_align_loop, L_align_check; + Label L_main_loop_prologue; + Label L_fold_512b, L_fold_512b_loop, L_fold_128b; + Label L_fold_tail, L_fold_tail_loop; + Label L_8byte_fold_check; + + const Register tmp[CRC32_TMP_REG_NUM] = {L0, L1, L2, L3, L4, L5, L6, G1, I0, I1, I2, I3, I4, I5, I7, O4, O5, G3}; + + Register const_64 = tmp[CRC32_TMP_REG_NUM-1]; + Register const_96 = tmp[CRC32_TMP_REG_NUM-1]; + Register const_160 = tmp[CRC32_TMP_REG_NUM-2]; + Register const_480 = tmp[CRC32_TMP_REG_NUM-1]; + Register const_544 = tmp[CRC32_TMP_REG_NUM-2]; + + set(ExternalAddress(StubRoutines::crc_table_addr()), table); + + not1(crc); // ~c + clruwu(crc); // clear upper 32 bits of crc + + // Check if below cutoff, proceed directly to cleanup code + mov(31, G4); + cmp_and_br_short(len, G4, Assembler::lessEqualUnsigned, Assembler::pt, L_cleanup_check); + + // Align buffer to 8 byte boundary + mov(8, O5); + and3(buf, 0x7, O4); + sub(O5, O4, O5); + and3(O5, 0x7, O5); + sub(len, O5, len); + ba(L_align_check); + delayed()->nop(); + + // Alignment loop, table look up method for up to 7 bytes + bind(L_align_loop); + ldub(buf, 0, O4); + inc(buf); + dec(O5); + xor3(O4, crc, O4); + and3(O4, 0xFF, O4); + sllx(O4, 2, O4); + lduw(table, O4, O4); + srlx(crc, 8, crc); + xor3(O4, crc, crc); + bind(L_align_check); + nop(); + cmp_and_br_short(O5, 0, Assembler::notEqual, Assembler::pt, L_align_loop); + + // Aligned on 64-bit (8-byte) boundary at this point + // Check if still above cutoff (31-bytes) + mov(31, G4); + cmp_and_br_short(len, G4, Assembler::lessEqualUnsigned, Assembler::pt, L_cleanup_check); + // At least 32 bytes left to process + + // Free up registers by storing them to FP registers + for (int i = 0; i < CRC32_TMP_REG_NUM; i++) { + movxtod(tmp[i], as_FloatRegister(2*i)); + } + + // Determine which loop to enter + // Shared prologue + ldxl(buf, G0, tmp[0]); + inc(buf, 8); + ldxl(buf, G0, tmp[1]); + inc(buf, 8); + xor3(tmp[0], crc, tmp[0]); // Fold CRC into first few bytes + and3(crc, 0, crc); // Clear out the crc register + // Main loop needs 128-bytes at least + mov(128, G4); + mov(64, tmp[2]); + cmp_and_br_short(len, G4, Assembler::greaterEqualUnsigned, Assembler::pt, L_main_loop_prologue); + // Less than 64 bytes + nop(); + cmp_and_br_short(len, tmp[2], Assembler::lessUnsigned, Assembler::pt, L_fold_tail); + // Between 64 and 127 bytes + set64(CRC32_CONST_96, const_96, tmp[8]); + set64(CRC32_CONST_160, const_160, tmp[9]); + fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[2], tmp[3], buf, 0); + fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[4], tmp[5], buf, 16); + fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[6], tmp[7], buf, 32); + dec(len, 48); + ba(L_fold_tail); + delayed()->nop(); + + bind(L_main_loop_prologue); + for (int i = 2; i < 8; i++) { + ldxl(buf, G0, tmp[i]); + inc(buf, 8); + } + + // Fold total 512 bits of polynomial on each iteration, + // 128 bits per each of 4 parallel streams + set64(CRC32_CONST_480, const_480, tmp[8]); + set64(CRC32_CONST_544, const_544, tmp[9]); + + mov(128, G4); + bind(L_fold_512b_loop); + fold_128bit_crc32(tmp[1], tmp[0], const_480, const_544, tmp[9], tmp[8], buf, 0); + fold_128bit_crc32(tmp[3], tmp[2], const_480, const_544, tmp[11], tmp[10], buf, 16); + fold_128bit_crc32(tmp[5], tmp[4], const_480, const_544, tmp[13], tmp[12], buf, 32); + fold_128bit_crc32(tmp[7], tmp[6], const_480, const_544, tmp[15], tmp[14], buf, 64); + dec(len, 64); + cmp_and_br_short(len, G4, Assembler::greaterEqualUnsigned, Assembler::pt, L_fold_512b_loop); + + // Fold 512 bits to 128 bits + bind(L_fold_512b); + set64(CRC32_CONST_96, const_96, tmp[8]); + set64(CRC32_CONST_160, const_160, tmp[9]); + + fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[8], tmp[9], tmp[3], tmp[2]); + fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[8], tmp[9], tmp[5], tmp[4]); + fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[8], tmp[9], tmp[7], tmp[6]); + dec(len, 48); + + // Fold the rest of 128 bits data chunks + bind(L_fold_tail); + mov(32, G4); + cmp_and_br_short(len, G4, Assembler::lessEqualUnsigned, Assembler::pt, L_fold_128b); + + set64(CRC32_CONST_96, const_96, tmp[8]); + set64(CRC32_CONST_160, const_160, tmp[9]); + + bind(L_fold_tail_loop); + fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[2], tmp[3], buf, 0); + sub(len, 16, len); + cmp_and_br_short(len, G4, Assembler::greaterEqualUnsigned, Assembler::pt, L_fold_tail_loop); + + // Fold the 128 bits in tmps 0 - 1 into tmp 1 + bind(L_fold_128b); + + set64(CRC32_CONST_64, const_64, tmp[4]); + + xmulx(const_64, tmp[0], tmp[2]); + xmulxhi(const_64, tmp[0], tmp[3]); + + srl(tmp[2], G0, tmp[4]); + xmulx(const_64, tmp[4], tmp[4]); + + srlx(tmp[2], 32, tmp[2]); + sllx(tmp[3], 32, tmp[3]); + or3(tmp[2], tmp[3], tmp[2]); + + xor3(tmp[4], tmp[1], tmp[4]); + xor3(tmp[4], tmp[2], tmp[1]); + dec(len, 8); + + // Use table lookup for the 8 bytes left in tmp[1] + dec(len, 8); + + // 8 8-bit folds to compute 32-bit CRC. + for (int j = 0; j < 4; j++) { + fold_8bit_crc32(tmp[1], table, tmp[2], tmp[3]); + } + srl(tmp[1], G0, crc); // move 32 bits to general register + for (int j = 0; j < 4; j++) { + fold_8bit_crc32(crc, table, tmp[3]); + } + + bind(L_8byte_fold_check); + + // Restore int registers saved in FP registers + for (int i = 0; i < CRC32_TMP_REG_NUM; i++) { + movdtox(as_FloatRegister(2*i), tmp[i]); + } + + ba(L_cleanup_check); + delayed()->nop(); + + // Table look-up method for the remaining few bytes + bind(L_cleanup_loop); + ldub(buf, 0, O4); + inc(buf); + dec(len); + xor3(O4, crc, O4); + and3(O4, 0xFF, O4); + sllx(O4, 2, O4); + lduw(table, O4, O4); + srlx(crc, 8, crc); + xor3(O4, crc, crc); + bind(L_cleanup_check); + nop(); + cmp_and_br_short(len, 0, Assembler::greaterUnsigned, Assembler::pt, L_cleanup_loop); + + not1(crc); +} + +#define CHUNK_LEN 128 /* 128 x 8B = 1KB */ +#define CHUNK_K1 0x1307a0206 /* reverseBits(pow(x, CHUNK_LEN*8*8*3 - 32) mod P(x)) << 1 */ +#define CHUNK_K2 0x1a0f717c4 /* reverseBits(pow(x, CHUNK_LEN*8*8*2 - 32) mod P(x)) << 1 */ +#define CHUNK_K3 0x0170076fa /* reverseBits(pow(x, CHUNK_LEN*8*8*1 - 32) mod P(x)) << 1 */ + +void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len, Register table) { + + Label L_crc32c_head, L_crc32c_aligned; + Label L_crc32c_parallel, L_crc32c_parallel_loop; + Label L_crc32c_serial, L_crc32c_x32_loop, L_crc32c_x8, L_crc32c_x8_loop; + Label L_crc32c_done, L_crc32c_tail, L_crc32c_return; + + set(ExternalAddress(StubRoutines::crc32c_table_addr()), table); + + cmp_and_br_short(len, 0, Assembler::lessEqual, Assembler::pn, L_crc32c_return); + + // clear upper 32 bits of crc + clruwu(crc); + + and3(buf, 7, G4); + cmp_and_brx_short(G4, 0, Assembler::equal, Assembler::pt, L_crc32c_aligned); + + mov(8, G1); + sub(G1, G4, G4); + + // ------ process the misaligned head (7 bytes or less) ------ + bind(L_crc32c_head); + + // crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF]; + ldub(buf, 0, G1); + update_byte_crc32(crc, G1, table); + + inc(buf); + dec(len); + cmp_and_br_short(len, 0, Assembler::equal, Assembler::pn, L_crc32c_return); + dec(G4); + cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_head); + + // ------ process the 8-byte-aligned body ------ + bind(L_crc32c_aligned); + nop(); + cmp_and_br_short(len, 8, Assembler::less, Assembler::pn, L_crc32c_tail); + + // reverse the byte order of lower 32 bits to big endian, and move to FP side + movitof_revbytes(crc, F0, G1, G3); + + set(CHUNK_LEN*8*4, G4); + cmp_and_br_short(len, G4, Assembler::less, Assembler::pt, L_crc32c_serial); + + // ------ process four 1KB chunks in parallel ------ + bind(L_crc32c_parallel); + + fzero(FloatRegisterImpl::D, F2); + fzero(FloatRegisterImpl::D, F4); + fzero(FloatRegisterImpl::D, F6); + + mov(CHUNK_LEN - 1, G4); + bind(L_crc32c_parallel_loop); + // schedule ldf's ahead of crc32c's to hide the load-use latency + ldf(FloatRegisterImpl::D, buf, 0, F8); + ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8, F10); + ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12); + ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*24, F14); + crc32c(F0, F8, F0); + crc32c(F2, F10, F2); + crc32c(F4, F12, F4); + crc32c(F6, F14, F6); + inc(buf, 8); + dec(G4); + cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_parallel_loop); + + ldf(FloatRegisterImpl::D, buf, 0, F8); + ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8, F10); + ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12); + crc32c(F0, F8, F0); + crc32c(F2, F10, F2); + crc32c(F4, F12, F4); + + inc(buf, CHUNK_LEN*24); + ldfl(FloatRegisterImpl::D, buf, G0, F14); // load in little endian + inc(buf, 8); + + prefetch(buf, 0, Assembler::severalReads); + prefetch(buf, CHUNK_LEN*8, Assembler::severalReads); + prefetch(buf, CHUNK_LEN*16, Assembler::severalReads); + prefetch(buf, CHUNK_LEN*24, Assembler::severalReads); + + // move to INT side, and reverse the byte order of lower 32 bits to little endian + movftoi_revbytes(F0, O4, G1, G4); + movftoi_revbytes(F2, O5, G1, G4); + movftoi_revbytes(F4, G5, G1, G4); + + // combine the results of 4 chunks + set64(CHUNK_K1, G3, G1); + xmulx(O4, G3, O4); + set64(CHUNK_K2, G3, G1); + xmulx(O5, G3, O5); + set64(CHUNK_K3, G3, G1); + xmulx(G5, G3, G5); + + movdtox(F14, G4); + xor3(O4, O5, O5); + xor3(G5, O5, O5); + xor3(G4, O5, O5); + + // reverse the byte order to big endian, via stack, and move to FP side + // TODO: use new revb instruction + add(SP, -8, G1); + srlx(G1, 3, G1); + sllx(G1, 3, G1); + stx(O5, G1, G0); + ldfl(FloatRegisterImpl::D, G1, G0, F2); // load in little endian + + crc32c(F6, F2, F0); + + set(CHUNK_LEN*8*4, G4); + sub(len, G4, len); + cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_parallel); + nop(); + cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_done); + + bind(L_crc32c_serial); + + mov(32, G4); + cmp_and_br_short(len, G4, Assembler::less, Assembler::pn, L_crc32c_x8); + + // ------ process 32B chunks ------ + bind(L_crc32c_x32_loop); + ldf(FloatRegisterImpl::D, buf, 0, F2); + crc32c(F0, F2, F0); + ldf(FloatRegisterImpl::D, buf, 8, F2); + crc32c(F0, F2, F0); + ldf(FloatRegisterImpl::D, buf, 16, F2); + crc32c(F0, F2, F0); + ldf(FloatRegisterImpl::D, buf, 24, F2); + inc(buf, 32); + crc32c(F0, F2, F0); + dec(len, 32); + cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_x32_loop); + + bind(L_crc32c_x8); + nop(); + cmp_and_br_short(len, 8, Assembler::less, Assembler::pt, L_crc32c_done); + + // ------ process 8B chunks ------ + bind(L_crc32c_x8_loop); + ldf(FloatRegisterImpl::D, buf, 0, F2); + inc(buf, 8); + crc32c(F0, F2, F0); + dec(len, 8); + cmp_and_br_short(len, 8, Assembler::greaterEqual, Assembler::pt, L_crc32c_x8_loop); + + bind(L_crc32c_done); + + // move to INT side, and reverse the byte order of lower 32 bits to little endian + movftoi_revbytes(F0, crc, G1, G3); + + cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_return); + + // ------ process the misaligned tail (7 bytes or less) ------ + bind(L_crc32c_tail); + + // crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF]; + ldub(buf, 0, G1); + update_byte_crc32(crc, G1, table); + + inc(buf); + dec(len); + cmp_and_br_short(len, 0, Assembler::greater, Assembler::pt, L_crc32c_tail); + + bind(L_crc32c_return); + nop(); +} diff -ur --new-file a/src/hotspot/cpu/sparc/macroAssembler_sparc.hpp b/src/hotspot/cpu/sparc/macroAssembler_sparc.hpp --- a/src/hotspot/cpu/sparc/macroAssembler_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/macroAssembler_sparc.hpp 2023-04-16 11:42:11.067751202 +0000 @@ -0,0 +1,1332 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_MACROASSEMBLER_SPARC_HPP +#define CPU_SPARC_MACROASSEMBLER_SPARC_HPP + +#include "asm/assembler.hpp" +#include "utilities/macros.hpp" + +// promises that the system will not use traps 16-31 +#define ST_RESERVED_FOR_USER_0 0x10 + +class BiasedLockingCounters; + + +// Register aliases for parts of the system: + +// 64 bit values can be kept in g1-g5, o1-o5 and o7 and all 64 bits are safe +// across context switches in V8+ ABI. Of course, there are no 64 bit regs +// in V8 ABI. All 64 bits are preserved in V9 ABI for all registers. + +// g2-g4 are scratch registers called "application globals". Their +// meaning is reserved to the "compilation system"--which means us! +// They are are not supposed to be touched by ordinary C code, although +// highly-optimized C code might steal them for temps. They are safe +// across thread switches, and the ABI requires that they be safe +// across function calls. +// +// g1 and g3 are touched by more modules. V8 allows g1 to be clobbered +// across func calls, and V8+ also allows g5 to be clobbered across +// func calls. Also, g1 and g5 can get touched while doing shared +// library loading. +// +// We must not touch g7 (it is the thread-self register) and g6 is +// reserved for certain tools. g0, of course, is always zero. +// +// (Sources: SunSoft Compilers Group, thread library engineers.) + +// %%%% The interpreter should be revisited to reduce global scratch regs. + +// This global always holds the current JavaThread pointer: + +REGISTER_DECLARATION(Register, G2_thread , G2); +REGISTER_DECLARATION(Register, G6_heapbase , G6); + +// The following globals are part of the Java calling convention: + +REGISTER_DECLARATION(Register, G5_method , G5); +REGISTER_DECLARATION(Register, G5_megamorphic_method , G5_method); +REGISTER_DECLARATION(Register, G5_inline_cache_reg , G5_method); + +// The following globals are used for the new C1 & interpreter calling convention: +REGISTER_DECLARATION(Register, Gargs , G4); // pointing to the last argument + +// This local is used to preserve G2_thread in the interpreter and in stubs: +REGISTER_DECLARATION(Register, L7_thread_cache , L7); + +// These globals are used as scratch registers in the interpreter: + +REGISTER_DECLARATION(Register, Gframe_size , G1); // SAME REG as G1_scratch +REGISTER_DECLARATION(Register, G1_scratch , G1); // also SAME +REGISTER_DECLARATION(Register, G3_scratch , G3); +REGISTER_DECLARATION(Register, G4_scratch , G4); + +// These globals are used as short-lived scratch registers in the compiler: + +REGISTER_DECLARATION(Register, Gtemp , G5); + +// JSR 292 fixed register usages: +REGISTER_DECLARATION(Register, G5_method_type , G5); +REGISTER_DECLARATION(Register, G3_method_handle , G3); +REGISTER_DECLARATION(Register, L7_mh_SP_save , L7); + +// The compiler requires that G5_megamorphic_method is G5_inline_cache_klass, +// because a single patchable "set" instruction (NativeMovConstReg, +// or NativeMovConstPatching for compiler1) instruction +// serves to set up either quantity, depending on whether the compiled +// call site is an inline cache or is megamorphic. See the function +// CompiledIC::set_to_megamorphic. +// +// If a inline cache targets an interpreted method, then the +// G5 register will be used twice during the call. First, +// the call site will be patched to load a compiledICHolder +// into G5. (This is an ordered pair of ic_klass, method.) +// The c2i adapter will first check the ic_klass, then load +// G5_method with the method part of the pair just before +// jumping into the interpreter. +// +// Note that G5_method is only the method-self for the interpreter, +// and is logically unrelated to G5_megamorphic_method. +// +// Invariants on G2_thread (the JavaThread pointer): +// - it should not be used for any other purpose anywhere +// - it must be re-initialized by StubRoutines::call_stub() +// - it must be preserved around every use of call_VM + +// We can consider using g2/g3/g4 to cache more values than the +// JavaThread, such as the card-marking base or perhaps pointers into +// Eden. It's something of a waste to use them as scratch temporaries, +// since they are not supposed to be volatile. (Of course, if we find +// that Java doesn't benefit from application globals, then we can just +// use them as ordinary temporaries.) +// +// Since g1 and g5 (and/or g6) are the volatile (caller-save) registers, +// it makes sense to use them routinely for procedure linkage, +// whenever the On registers are not applicable. Examples: G5_method, +// G5_inline_cache_klass, and a double handful of miscellaneous compiler +// stubs. This means that compiler stubs, etc., should be kept to a +// maximum of two or three G-register arguments. + + +// stub frames + +REGISTER_DECLARATION(Register, Lentry_args , L0); // pointer to args passed to callee (interpreter) not stub itself + +// Interpreter frames + +REGISTER_DECLARATION(Register, Lesp , L0); // expression stack pointer +REGISTER_DECLARATION(Register, Lbcp , L1); // pointer to next bytecode +REGISTER_DECLARATION(Register, Lmethod , L2); +REGISTER_DECLARATION(Register, Llocals , L3); +REGISTER_DECLARATION(Register, Largs , L3); // pointer to locals for signature handler + // must match Llocals in asm interpreter +REGISTER_DECLARATION(Register, Lmonitors , L4); +REGISTER_DECLARATION(Register, Lbyte_code , L5); +// When calling out from the interpreter we record SP so that we can remove any extra stack +// space allocated during adapter transitions. This register is only live from the point +// of the call until we return. +REGISTER_DECLARATION(Register, Llast_SP , L5); +REGISTER_DECLARATION(Register, Lscratch , L5); +REGISTER_DECLARATION(Register, Lscratch2 , L6); +REGISTER_DECLARATION(Register, LcpoolCache , L6); // constant pool cache + +REGISTER_DECLARATION(Register, O5_savedSP , O5); +REGISTER_DECLARATION(Register, I5_savedSP , I5); // Saved SP before bumping for locals. This is simply + // a copy SP, so in 64-bit it's a biased value. The bias + // is added and removed as needed in the frame code. +REGISTER_DECLARATION(Register, IdispatchAddress , I3); // Register which saves the dispatch address for each bytecode +REGISTER_DECLARATION(Register, ImethodDataPtr , I2); // Pointer to the current method data + +// NOTE: Lscratch2 and LcpoolCache point to the same registers in +// the interpreter code. If Lscratch2 needs to be used for some +// purpose than LcpoolCache should be restore after that for +// the interpreter to work right +// (These assignments must be compatible with L7_thread_cache; see above.) + +// Lbcp points into the middle of the method object. + +// Exception processing +// These registers are passed into exception handlers. +// All exception handlers require the exception object being thrown. +// In addition, an nmethod's exception handler must be passed +// the address of the call site within the nmethod, to allow +// proper selection of the applicable catch block. +// (Interpreter frames use their own bcp() for this purpose.) +// +// The Oissuing_pc value is not always needed. When jumping to a +// handler that is known to be interpreted, the Oissuing_pc value can be +// omitted. An actual catch block in compiled code receives (from its +// nmethod's exception handler) the thrown exception in the Oexception, +// but it doesn't need the Oissuing_pc. +// +// If an exception handler (either interpreted or compiled) +// discovers there is no applicable catch block, it updates +// the Oissuing_pc to the continuation PC of its own caller, +// pops back to that caller's stack frame, and executes that +// caller's exception handler. Obviously, this process will +// iterate until the control stack is popped back to a method +// containing an applicable catch block. A key invariant is +// that the Oissuing_pc value is always a value local to +// the method whose exception handler is currently executing. +// +// Note: The issuing PC value is __not__ a raw return address (I7 value). +// It is a "return pc", the address __following__ the call. +// Raw return addresses are converted to issuing PCs by frame::pc(), +// or by stubs. Issuing PCs can be used directly with PC range tables. +// +REGISTER_DECLARATION(Register, Oexception , O0); // exception being thrown +REGISTER_DECLARATION(Register, Oissuing_pc , O1); // where the exception is coming from + +// Address is an abstraction used to represent a memory location. +// +// Note: A register location is represented via a Register, not +// via an address for efficiency & simplicity reasons. + +class Address { + private: + Register _base; // Base register. + RegisterOrConstant _index_or_disp; // Index register or constant displacement. + RelocationHolder _rspec; + + public: + Address() : _base(noreg), _index_or_disp(noreg) {} + + Address(Register base, RegisterOrConstant index_or_disp) + : _base(base), + _index_or_disp(index_or_disp) { + } + + Address(Register base, Register index) + : _base(base), + _index_or_disp(index) { + } + + Address(Register base, int disp) + : _base(base), + _index_or_disp(disp) { + } + + Address(Register base, ByteSize disp) : + Address(base, in_bytes(disp)) {} + + // accessors + Register base() const { return _base; } + Register index() const { return _index_or_disp.as_register(); } + int disp() const { return _index_or_disp.as_constant(); } + + bool has_index() const { return _index_or_disp.is_register(); } + bool has_disp() const { return _index_or_disp.is_constant(); } + + bool uses(Register reg) const { return base() == reg || (has_index() && index() == reg); } + + const relocInfo::relocType rtype() { return _rspec.type(); } + const RelocationHolder& rspec() { return _rspec; } + + RelocationHolder rspec(int offset) const { + return offset == 0 ? _rspec : _rspec.plus(offset); + } + + inline bool is_simm13(int offset = 0); // check disp+offset for overflow + + Address plus_disp(int plusdisp) const { // bump disp by a small amount + assert(_index_or_disp.is_constant(), "must have a displacement"); + Address a(base(), disp() + plusdisp); + return a; + } + bool is_same_address(Address a) const { + // disregard _rspec + return base() == a.base() && (has_index() ? index() == a.index() : disp() == a.disp()); + } + + Address after_save() const { + Address a = (*this); + a._base = a._base->after_save(); + return a; + } + + Address after_restore() const { + Address a = (*this); + a._base = a._base->after_restore(); + return a; + } + + // Convert the raw encoding form into the form expected by the + // constructor for Address. + static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc); + + friend class Assembler; +}; + + +class AddressLiteral { + private: + address _address; + RelocationHolder _rspec; + + RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) { + switch (rtype) { + case relocInfo::external_word_type: + return external_word_Relocation::spec(addr); + case relocInfo::internal_word_type: + return internal_word_Relocation::spec(addr); + case relocInfo::opt_virtual_call_type: + return opt_virtual_call_Relocation::spec(); + case relocInfo::static_call_type: + return static_call_Relocation::spec(); + case relocInfo::runtime_call_type: + return runtime_call_Relocation::spec(); + case relocInfo::none: + return RelocationHolder(); + default: + ShouldNotReachHere(); + return RelocationHolder(); + } + } + + protected: + // creation + AddressLiteral() : _address(NULL), _rspec(NULL) {} + + public: + AddressLiteral(address addr, RelocationHolder const& rspec) + : _address(addr), + _rspec(rspec) {} + + // Some constructors to avoid casting at the call site. + AddressLiteral(jobject obj, RelocationHolder const& rspec) + : _address((address) obj), + _rspec(rspec) {} + + AddressLiteral(intptr_t value, RelocationHolder const& rspec) + : _address((address) value), + _rspec(rspec) {} + + AddressLiteral(address addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + // Some constructors to avoid casting at the call site. + AddressLiteral(address* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(bool* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(const bool* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(signed char* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(int* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(intptr_t addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + // 32-bit complains about a multiple declaration for int*. + AddressLiteral(intptr_t* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(Metadata* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(Metadata** addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(float* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(double* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + intptr_t value() const { return (intptr_t) _address; } + int low10() const; + + const relocInfo::relocType rtype() const { return _rspec.type(); } + const RelocationHolder& rspec() const { return _rspec; } + + RelocationHolder rspec(int offset) const { + return offset == 0 ? _rspec : _rspec.plus(offset); + } +}; + +// Convenience classes +class ExternalAddress: public AddressLiteral { + private: + static relocInfo::relocType reloc_for_target(address target) { + // Sometimes ExternalAddress is used for values which aren't + // exactly addresses, like the card table base. + // external_word_type can't be used for values in the first page + // so just skip the reloc in that case. + return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none; + } + + public: + ExternalAddress(address target) : AddressLiteral(target, reloc_for_target( target)) {} + ExternalAddress(Metadata** target) : AddressLiteral(target, reloc_for_target((address) target)) {} +}; + +inline Address RegisterImpl::address_in_saved_window() const { + return (Address(SP, (sp_offset_in_saved_window() * wordSize) + STACK_BIAS)); +} + + + +// Argument is an abstraction used to represent an outgoing +// actual argument or an incoming formal parameter, whether +// it resides in memory or in a register, in a manner consistent +// with the SPARC Application Binary Interface, or ABI. This is +// often referred to as the native or C calling convention. + +class Argument { + private: + int _number; + bool _is_in; + + public: + enum { + n_register_parameters = 6, // only 6 registers may contain integer parameters + n_float_register_parameters = 16 // Can have up to 16 floating registers + }; + + // creation + Argument(int number, bool is_in) : _number(number), _is_in(is_in) {} + + int number() const { return _number; } + bool is_in() const { return _is_in; } + bool is_out() const { return !is_in(); } + + Argument successor() const { return Argument(number() + 1, is_in()); } + Argument as_in() const { return Argument(number(), true ); } + Argument as_out() const { return Argument(number(), false); } + + // locating register-based arguments: + bool is_register() const { return _number < n_register_parameters; } + + // locating Floating Point register-based arguments: + bool is_float_register() const { return _number < n_float_register_parameters; } + + FloatRegister as_float_register() const { + assert(is_float_register(), "must be a register argument"); + return as_FloatRegister(( number() *2 ) + 1); + } + FloatRegister as_double_register() const { + assert(is_float_register(), "must be a register argument"); + return as_FloatRegister(( number() *2 )); + } + + Register as_register() const { + assert(is_register(), "must be a register argument"); + return is_in() ? as_iRegister(number()) : as_oRegister(number()); + } + + // locating memory-based arguments + Address as_address() const { + assert(!is_register(), "must be a memory argument"); + return address_in_frame(); + } + + // When applied to a register-based argument, give the corresponding address + // into the 6-word area "into which callee may store register arguments" + // (This is a different place than the corresponding register-save area location.) + Address address_in_frame() const; + + // debugging + const char* name() const; + + friend class Assembler; +}; + + +class RegistersForDebugging : public StackObj { + private: + static const RegistersForDebugging& _dummy; // not ODR-used so not defined + + public: + intptr_t i[8], l[8], o[8], g[8]; + float f[32]; + double d[32]; + + void print(outputStream* s); + + static int i_offset(int j) { return offset_of(RegistersForDebugging, i) + j * sizeof(_dummy.i[0]); } + static int l_offset(int j) { return offset_of(RegistersForDebugging, l) + j * sizeof(_dummy.l[0]); } + static int o_offset(int j) { return offset_of(RegistersForDebugging, o) + j * sizeof(_dummy.o[0]); } + static int g_offset(int j) { return offset_of(RegistersForDebugging, g) + j * sizeof(_dummy.g[0]); } + static int f_offset(int j) { return offset_of(RegistersForDebugging, f) + j * sizeof(_dummy.f[0]); } + static int d_offset(int j) { return offset_of(RegistersForDebugging, d) + (j / 2) * sizeof(_dummy.d[0]); } + + // gen asm code to save regs + static void save_registers(MacroAssembler* a); + + // restore global registers in case C code disturbed them + static void restore_registers(MacroAssembler* a, Register r); +}; + + +// MacroAssembler extends Assembler by a few frequently used macros. +// +// Most of the standard SPARC synthetic ops are defined here. +// Instructions for which a 'better' code sequence exists depending +// on arguments should also go in here. + +#define JMP2(r1, r2) jmp(r1, r2, __FILE__, __LINE__) +#define JMP(r1, off) jmp(r1, off, __FILE__, __LINE__) +#define JUMP(a, temp, off) jump(a, temp, off, __FILE__, __LINE__) +#define JUMPL(a, temp, d, off) jumpl(a, temp, d, off, __FILE__, __LINE__) + + +class MacroAssembler : public Assembler { + // code patchers need various routines like inv_wdisp() + friend class NativeInstruction; + friend class NativeGeneralJump; + friend class Relocation; + friend class Label; + + protected: + static int patched_branch(int dest_pos, int inst, int inst_pos); + static int branch_destination(int inst, int pos); + + // Support for VM calls + // This is the base routine called by the different versions of call_VM_leaf. The interpreter + // may customize this version by overriding it for its purposes (e.g., to save/restore + // additional registers when doing a VM call). + virtual void call_VM_leaf_base(Register thread_cache, address entry_point, int number_of_arguments); + + // + // It is imperative that all calls into the VM are handled via the call_VM macros. + // They make sure that the stack linkage is setup correctly. call_VM's correspond + // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. + // + // This is the base routine called by the different versions of call_VM. The interpreter + // may customize this version by overriding it for its purposes (e.g., to save/restore + // additional registers when doing a VM call). + // + // A non-volatile java_thread_cache register should be specified so + // that the G2_thread value can be preserved across the call. + // (If java_thread_cache is noreg, then a slow get_thread call + // will re-initialize the G2_thread.) call_VM_base returns the register that contains the + // thread. + // + // If no last_java_sp is specified (noreg) than SP will be used instead. + + virtual void call_VM_base( + Register oop_result, // where an oop-result ends up if any; use noreg otherwise + Register java_thread_cache, // the thread if computed before ; use noreg otherwise + Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise + address entry_point, // the entry point + int number_of_arguments, // the number of arguments (w/o thread) to pop after call + bool check_exception=true // flag which indicates if exception should be checked + ); + + public: + MacroAssembler(CodeBuffer* code) : Assembler(code) {} + + // This routine should emit JVMTI PopFrame and ForceEarlyReturn handling code. + // The implementation is only non-empty for the InterpreterMacroAssembler, + // as only the interpreter handles and ForceEarlyReturn PopFrame requests. + virtual void check_and_handle_popframe(Register scratch_reg); + virtual void check_and_handle_earlyret(Register scratch_reg); + + // Support for NULL-checks + // + // Generates code that causes a NULL OS exception if the content of reg is NULL. + // If the accessed location is M[reg + offset] and the offset is known, provide the + // offset. No explicit code generation is needed if the offset is within a certain + // range (0 <= offset <= page_size). + // + // FIXME: Currently not done for SPARC + + void null_check(Register reg, int offset = -1); + static bool needs_explicit_null_check(intptr_t offset); + static bool uses_implicit_null_check(void* address); + + // support for delayed instructions + MacroAssembler* delayed() { Assembler::delayed(); return this; } + + // branches that use right instruction for v8 vs. v9 + inline void br( Condition c, bool a, Predict p, address d, relocInfo::relocType rt = relocInfo::none ); + inline void br( Condition c, bool a, Predict p, Label& L ); + + inline void fb( Condition c, bool a, Predict p, address d, relocInfo::relocType rt = relocInfo::none ); + inline void fb( Condition c, bool a, Predict p, Label& L ); + + // compares register with zero (32 bit) and branches (V9 and V8 instructions) + void cmp_zero_and_br( Condition c, Register s1, Label& L, bool a = false, Predict p = pn ); + // Compares a pointer register with zero and branches on (not)null. + // Does a test & branch on 32-bit systems and a register-branch on 64-bit. + void br_null ( Register s1, bool a, Predict p, Label& L ); + void br_notnull( Register s1, bool a, Predict p, Label& L ); + + // + // Compare registers and branch with nop in delay slot or cbcond without delay slot. + // + // ATTENTION: use these instructions with caution because cbcond instruction + // has very short distance: 512 instructions (2Kbyte). + + // Compare integer (32 bit) values (icc only). + void cmp_and_br_short(Register s1, Register s2, Condition c, Predict p, Label& L); + void cmp_and_br_short(Register s1, int simm13a, Condition c, Predict p, Label& L); + // Platform depending version for pointer compare (icc on !LP64 and xcc on LP64). + void cmp_and_brx_short(Register s1, Register s2, Condition c, Predict p, Label& L); + void cmp_and_brx_short(Register s1, int simm13a, Condition c, Predict p, Label& L); + + // Short branch version for compares a pointer pwith zero. + void br_null_short ( Register s1, Predict p, Label& L ); + void br_notnull_short( Register s1, Predict p, Label& L ); + + // unconditional short branch + void ba_short(Label& L); + + // Branch on icc.z (true or not). + void br_icc_zero(bool iszero, Predict p, Label &L); + + inline void bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none ); + inline void bp( Condition c, bool a, CC cc, Predict p, Label& L ); + + // Branch that tests xcc in LP64 and icc in !LP64 + inline void brx( Condition c, bool a, Predict p, address d, relocInfo::relocType rt = relocInfo::none ); + inline void brx( Condition c, bool a, Predict p, Label& L ); + + // unconditional branch + inline void ba( Label& L ); + + // Branch that tests fp condition codes + inline void fbp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none ); + inline void fbp( Condition c, bool a, CC cc, Predict p, Label& L ); + + // Sparc shorthands(pp 85, V8 manual, pp 289 V9 manual) + inline void cmp( Register s1, Register s2 ); + inline void cmp( Register s1, int simm13a ); + + inline void jmp( Register s1, Register s2 ); + inline void jmp( Register s1, int simm13a, RelocationHolder const& rspec = RelocationHolder() ); + + // Check if the call target is out of wdisp30 range (relative to the code cache) + static inline bool is_far_target(address d); + inline void call( address d, relocInfo::relocType rt = relocInfo::runtime_call_type ); + inline void call( address d, RelocationHolder const& rspec); + + inline void call( Label& L, relocInfo::relocType rt = relocInfo::runtime_call_type ); + inline void call( Label& L, RelocationHolder const& rspec); + + inline void callr( Register s1, Register s2 ); + inline void callr( Register s1, int simm13a, RelocationHolder const& rspec = RelocationHolder() ); + + inline void tst( Register s ); + + inline void ret( bool trace = false ); + inline void retl( bool trace = false ); + + // Required platform-specific helpers for Label::patch_instructions. + // They _shadow_ the declarations in AbstractAssembler, which are undefined. + void pd_patch_instruction(address branch, address target, const char* file, int line); + + // sethi Macro handles optimizations and relocations +private: + void internal_sethi(const AddressLiteral& addrlit, Register d, bool ForceRelocatable); +public: + void sethi(const AddressLiteral& addrlit, Register d); + void patchable_sethi(const AddressLiteral& addrlit, Register d); + + // compute the number of instructions for a sethi/set + static int insts_for_sethi( address a, bool worst_case = false ); + static int worst_case_insts_for_set(); + + // set may be either setsw or setuw (high 32 bits may be zero or sign) +private: + void internal_set(const AddressLiteral& al, Register d, bool ForceRelocatable); + static int insts_for_internal_set(intptr_t value); +public: + void set(const AddressLiteral& addrlit, Register d); + void set(intptr_t value, Register d); + void set(address addr, Register d, RelocationHolder const& rspec); + static int insts_for_set(intptr_t value) { return insts_for_internal_set(value); } + + void patchable_set(const AddressLiteral& addrlit, Register d); + void patchable_set(intptr_t value, Register d); + void set64(jlong value, Register d, Register tmp); + static int insts_for_set64(jlong value); + + // sign-extend 32 to 64 + inline void signx( Register s, Register d ); + inline void signx( Register d ); + + inline void not1( Register s, Register d ); + inline void not1( Register d ); + + inline void neg( Register s, Register d ); + inline void neg( Register d ); + + inline void cas( Register s1, Register s2, Register d); + inline void casx( Register s1, Register s2, Register d); + // Functions for isolating 64 bit atomic swaps for LP64 + // cas_ptr will perform cas for 32 bit VM's and casx for 64 bit VM's + inline void cas_ptr( Register s1, Register s2, Register d); + + // Resolve a jobject or jweak + void resolve_jobject(Register value, Register tmp); + + // Functions for isolating 64 bit shifts for LP64 + inline void sll_ptr( Register s1, Register s2, Register d ); + inline void sll_ptr( Register s1, int imm6a, Register d ); + inline void sll_ptr( Register s1, RegisterOrConstant s2, Register d ); + inline void srl_ptr( Register s1, Register s2, Register d ); + inline void srl_ptr( Register s1, int imm6a, Register d ); + + // little-endian + inline void casl( Register s1, Register s2, Register d); + inline void casxl( Register s1, Register s2, Register d); + + inline void inc( Register d, int const13 = 1 ); + inline void inccc( Register d, int const13 = 1 ); + + inline void dec( Register d, int const13 = 1 ); + inline void deccc( Register d, int const13 = 1 ); + + using Assembler::add; + inline void add(Register s1, int simm13a, Register d, relocInfo::relocType rtype); + inline void add(Register s1, int simm13a, Register d, RelocationHolder const& rspec); + inline void add(Register s1, RegisterOrConstant s2, Register d, int offset = 0); + inline void add(const Address& a, Register d, int offset = 0); + + using Assembler::andn; + inline void andn( Register s1, RegisterOrConstant s2, Register d); + + inline void btst( Register s1, Register s2 ); + inline void btst( int simm13a, Register s ); + + inline void bset( Register s1, Register s2 ); + inline void bset( int simm13a, Register s ); + + inline void bclr( Register s1, Register s2 ); + inline void bclr( int simm13a, Register s ); + + inline void btog( Register s1, Register s2 ); + inline void btog( int simm13a, Register s ); + + inline void clr( Register d ); + + inline void clrb( Register s1, Register s2); + inline void clrh( Register s1, Register s2); + inline void clr( Register s1, Register s2); + inline void clrx( Register s1, Register s2); + + inline void clrb( Register s1, int simm13a); + inline void clrh( Register s1, int simm13a); + inline void clr( Register s1, int simm13a); + inline void clrx( Register s1, int simm13a); + + // copy & clear upper word + inline void clruw( Register s, Register d ); + // clear upper word + inline void clruwu( Register d ); + + using Assembler::ldsb; + using Assembler::ldsh; + using Assembler::ldsw; + using Assembler::ldub; + using Assembler::lduh; + using Assembler::lduw; + using Assembler::ldx; + using Assembler::ldd; + + inline void ld(Register s1, ByteSize simm13a, Register d); + + inline void ld(Register s1, Register s2, Register d); + inline void ld(Register s1, int simm13a, Register d); + + inline void ldsb(const Address& a, Register d, int offset = 0); + inline void ldsh(const Address& a, Register d, int offset = 0); + inline void ldsw(const Address& a, Register d, int offset = 0); + inline void ldub(const Address& a, Register d, int offset = 0); + inline void lduh(const Address& a, Register d, int offset = 0); + inline void lduw(const Address& a, Register d, int offset = 0); + inline void ldx( const Address& a, Register d, int offset = 0); + inline void ld( const Address& a, Register d, int offset = 0); + inline void ldd( const Address& a, Register d, int offset = 0); + + inline void ldub(Register s1, RegisterOrConstant s2, Register d ); + inline void ldsb(Register s1, RegisterOrConstant s2, Register d ); + inline void lduh(Register s1, RegisterOrConstant s2, Register d ); + inline void ldsh(Register s1, RegisterOrConstant s2, Register d ); + inline void lduw(Register s1, RegisterOrConstant s2, Register d ); + inline void ldsw(Register s1, RegisterOrConstant s2, Register d ); + inline void ldx( Register s1, RegisterOrConstant s2, Register d ); + inline void ld( Register s1, RegisterOrConstant s2, Register d ); + inline void ldd( Register s1, RegisterOrConstant s2, Register d ); + + using Assembler::ldf; + inline void ldf(FloatRegisterImpl::Width w, Register s1, RegisterOrConstant s2, FloatRegister d); + inline void ldf(FloatRegisterImpl::Width w, const Address& a, FloatRegister d, int offset = 0); + + // little-endian + inline void lduwl(Register s1, Register s2, Register d); + inline void ldswl(Register s1, Register s2, Register d); + inline void ldxl( Register s1, Register s2, Register d); + inline void ldfl(FloatRegisterImpl::Width w, Register s1, Register s2, FloatRegister d); + + // membar pseudo instruction. takes into account target memory model. + inline void membar( Assembler::Membar_mask_bits const7a ); + + // returns if membar generates anything. + inline bool membar_has_effect( Assembler::Membar_mask_bits const7a ); + + // mov pseudo instructions + inline void mov( Register s, Register d); + + inline void mov_or_nop( Register s, Register d); + + inline void mov( int simm13a, Register d); + + using Assembler::prefetch; + inline void prefetch(const Address& a, PrefetchFcn F, int offset = 0); + + using Assembler::stb; + using Assembler::sth; + using Assembler::stw; + using Assembler::stx; + using Assembler::std; + + inline void st(Register d, Register s1, ByteSize simm13a); + + inline void st(Register d, Register s1, Register s2); + inline void st(Register d, Register s1, int simm13a); + + inline void stb(Register d, const Address& a, int offset = 0 ); + inline void sth(Register d, const Address& a, int offset = 0 ); + inline void stw(Register d, const Address& a, int offset = 0 ); + inline void stx(Register d, const Address& a, int offset = 0 ); + inline void st( Register d, const Address& a, int offset = 0 ); + inline void std(Register d, const Address& a, int offset = 0 ); + + inline void stb(Register d, Register s1, RegisterOrConstant s2 ); + inline void sth(Register d, Register s1, RegisterOrConstant s2 ); + inline void stw(Register d, Register s1, RegisterOrConstant s2 ); + inline void stx(Register d, Register s1, RegisterOrConstant s2 ); + inline void std(Register d, Register s1, RegisterOrConstant s2 ); + inline void st( Register d, Register s1, RegisterOrConstant s2 ); + + using Assembler::stf; + inline void stf(FloatRegisterImpl::Width w, FloatRegister d, Register s1, RegisterOrConstant s2); + inline void stf(FloatRegisterImpl::Width w, FloatRegister d, const Address& a, int offset = 0); + + // Note: offset is added to s2. + using Assembler::sub; + inline void sub(Register s1, RegisterOrConstant s2, Register d, int offset = 0); + + using Assembler::swap; + inline void swap(const Address& a, Register d, int offset = 0); + + // address pseudos: make these names unlike instruction names to avoid confusion + inline intptr_t load_pc_address( Register reg, int bytes_to_skip ); + inline void load_contents(const AddressLiteral& addrlit, Register d, int offset = 0); + inline void load_bool_contents(const AddressLiteral& addrlit, Register d, int offset = 0); + inline void load_ptr_contents(const AddressLiteral& addrlit, Register d, int offset = 0); + inline void store_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset = 0); + inline void store_ptr_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset = 0); + inline void jumpl_to(const AddressLiteral& addrlit, Register temp, Register d, int offset = 0); + inline void jump_to(const AddressLiteral& addrlit, Register temp, int offset = 0); + inline void jump_indirect_to(Address& a, Register temp, int ld_offset = 0, int jmp_offset = 0); + + // ring buffer traceable jumps + + void jmp2( Register r1, Register r2, const char* file, int line ); + void jmp ( Register r1, int offset, const char* file, int line ); + + void jumpl(const AddressLiteral& addrlit, Register temp, Register d, int offset, const char* file, int line); + void jump (const AddressLiteral& addrlit, Register temp, int offset, const char* file, int line); + + + // argument pseudos: + + inline void load_argument( Argument& a, Register d ); + inline void store_argument( Register s, Argument& a ); + inline void store_ptr_argument( Register s, Argument& a ); + inline void store_float_argument( FloatRegister s, Argument& a ); + inline void store_double_argument( FloatRegister s, Argument& a ); + inline void store_long_argument( Register s, Argument& a ); + + // handy macros: + + inline void round_to( Register r, int modulus ); + + // -------------------------------------------------- + + // Functions for isolating 64 bit loads for LP64 + // ld_ptr will perform ld for 32 bit VM's and ldx for 64 bit VM's + // st_ptr will perform st for 32 bit VM's and stx for 64 bit VM's + inline void ld_ptr(Register s1, Register s2, Register d); + inline void ld_ptr(Register s1, int simm13a, Register d); + inline void ld_ptr(Register s1, RegisterOrConstant s2, Register d); + inline void ld_ptr(const Address& a, Register d, int offset = 0); + inline void st_ptr(Register d, Register s1, Register s2); + inline void st_ptr(Register d, Register s1, int simm13a); + inline void st_ptr(Register d, Register s1, RegisterOrConstant s2); + inline void st_ptr(Register d, const Address& a, int offset = 0); + + inline void ld_ptr(Register s1, ByteSize simm13a, Register d); + inline void st_ptr(Register d, Register s1, ByteSize simm13a); + + // ld_long will perform ldd for 32 bit VM's and ldx for 64 bit VM's + // st_long will perform std for 32 bit VM's and stx for 64 bit VM's + inline void ld_long(Register s1, Register s2, Register d); + inline void ld_long(Register s1, int simm13a, Register d); + inline void ld_long(Register s1, RegisterOrConstant s2, Register d); + inline void ld_long(const Address& a, Register d, int offset = 0); + inline void st_long(Register d, Register s1, Register s2); + inline void st_long(Register d, Register s1, int simm13a); + inline void st_long(Register d, Register s1, RegisterOrConstant s2); + inline void st_long(Register d, const Address& a, int offset = 0); + + // Helpers for address formation. + // - They emit only a move if s2 is a constant zero. + // - If dest is a constant and either s1 or s2 is a register, the temp argument is required and becomes the result. + // - If dest is a register and either s1 or s2 is a non-simm13 constant, the temp argument is required and used to materialize the constant. + RegisterOrConstant regcon_andn_ptr(RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp = noreg); + RegisterOrConstant regcon_inc_ptr( RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp = noreg); + RegisterOrConstant regcon_sll_ptr( RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp = noreg); + + RegisterOrConstant ensure_simm13_or_reg(RegisterOrConstant src, Register temp) { + if (is_simm13(src.constant_or_zero())) + return src; // register or short constant + guarantee(temp != noreg, "constant offset overflow"); + set(src.as_constant(), temp); + return temp; + } + + // -------------------------------------------------- + + public: + // traps as per trap.h (SPARC ABI?) + + void breakpoint_trap(); + void breakpoint_trap(Condition c, CC cc); + + void safepoint_poll(Label& slow_path, bool a, Register thread_reg, Register temp_reg); + + // Stack frame creation/removal + void enter(); + void leave(); + + // Manipulation of C++ bools + // These are idioms to flag the need for care with accessing bools but on + // this platform we assume byte size + + inline void stbool(Register d, const Address& a); + inline void ldbool(const Address& a, Register d); + inline void movbool( bool boolconst, Register d); + + void resolve_oop_handle(Register result, Register tmp); + void load_mirror(Register mirror, Register method, Register tmp); + + // klass oop manipulations if compressed + void load_klass(Register src_oop, Register klass); + void store_klass(Register klass, Register dst_oop); + void store_klass_gap(Register s, Register dst_oop); + + // oop manipulations + void access_store_at(BasicType type, DecoratorSet decorators, + Register src, Address dst, Register tmp); + void access_load_at(BasicType type, DecoratorSet decorators, + Address src, Register dst, Register tmp); + + void load_heap_oop(const Address& s, Register d, + Register tmp = noreg, DecoratorSet decorators = 0); + void load_heap_oop(Register s1, Register s2, Register d, + Register tmp = noreg, DecoratorSet decorators = 0); + void load_heap_oop(Register s1, int simm13a, Register d, + Register tmp = noreg, DecoratorSet decorators = 0); + void load_heap_oop(Register s1, RegisterOrConstant s2, Register d, + Register tmp = noreg, DecoratorSet decorators = 0); + void store_heap_oop(Register d, Register s1, Register s2, + Register tmp = noreg, DecoratorSet decorators = 0); + void store_heap_oop(Register d, Register s1, int simm13a, + Register tmp = noreg, DecoratorSet decorators = 0); + void store_heap_oop(Register d, const Address& a, int offset = 0, + Register tmp = noreg, DecoratorSet decorators = 0); + + void encode_heap_oop(Register src, Register dst); + void encode_heap_oop(Register r) { + encode_heap_oop(r, r); + } + void decode_heap_oop(Register src, Register dst); + void decode_heap_oop(Register r) { + decode_heap_oop(r, r); + } + void encode_heap_oop_not_null(Register r); + void decode_heap_oop_not_null(Register r); + void encode_heap_oop_not_null(Register src, Register dst); + void decode_heap_oop_not_null(Register src, Register dst); + + void encode_klass_not_null(Register r); + void decode_klass_not_null(Register r); + void encode_klass_not_null(Register src, Register dst); + void decode_klass_not_null(Register src, Register dst); + + // Support for managing the JavaThread pointer (i.e.; the reference to + // thread-local information). + void get_thread(); // load G2_thread + void verify_thread(); // verify G2_thread contents + void save_thread (const Register threache); // save to cache + void restore_thread(const Register thread_cache); // restore from cache + + // Support for last Java frame (but use call_VM instead where possible) + void set_last_Java_frame(Register last_java_sp, Register last_Java_pc); + void reset_last_Java_frame(void); + + // Call into the VM. + // Passes the thread pointer (in O0) as a prepended argument. + // Makes sure oop return values are visible to the GC. + void call_VM(Register oop_result, address entry_point, int number_of_arguments = 0, bool check_exceptions = true); + void call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions = true); + void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); + void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true); + + // these overloadings are not presently used on SPARC: + void call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true); + void call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true); + void call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); + void call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true); + + void call_VM_leaf(Register thread_cache, address entry_point, int number_of_arguments = 0); + void call_VM_leaf(Register thread_cache, address entry_point, Register arg_1); + void call_VM_leaf(Register thread_cache, address entry_point, Register arg_1, Register arg_2); + void call_VM_leaf(Register thread_cache, address entry_point, Register arg_1, Register arg_2, Register arg_3); + + void get_vm_result (Register oop_result); + void get_vm_result_2(Register metadata_result); + + // vm result is currently getting hijacked to for oop preservation + void set_vm_result(Register oop_result); + + // Emit the CompiledIC call idiom + void ic_call(address entry, bool emit_delay = true, jint method_index = 0); + + // if call_VM_base was called with check_exceptions=false, then call + // check_and_forward_exception to handle exceptions when it is safe + void check_and_forward_exception(Register scratch_reg); + + // Returns the byte size of the instructions generated by decode_klass_not_null(). + static int instr_size_for_decode_klass_not_null(); + + // if heap base register is used - reinit it with the correct value + void reinit_heapbase(); + + // Debugging + void _verify_oop(Register reg, const char * msg, const char * file, int line); + void _verify_oop_addr(Address addr, const char * msg, const char * file, int line); + + // TODO: verify_method and klass metadata (compare against vptr?) + void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} + void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} + +#define verify_oop(reg) _verify_oop(reg, "broken oop " #reg, __FILE__, __LINE__) +#define verify_oop_addr(addr) _verify_oop_addr(addr, "broken oop addr ", __FILE__, __LINE__) +#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) +#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) + + void stop(const char* msg); // prints msg, dumps registers and stops execution + void warn(const char* msg); // prints msg, but don't stop + void untested(const char* what = ""); + void unimplemented(const char* what = ""); + void should_not_reach_here() { stop("should not reach here"); } + + // oops in code + AddressLiteral allocate_oop_address(jobject obj); // allocate_index + AddressLiteral constant_oop_address(jobject obj); // find_index + inline void set_oop (jobject obj, Register d); // uses allocate_oop_address + inline void set_oop_constant (jobject obj, Register d); // uses constant_oop_address + inline void set_oop (const AddressLiteral& obj_addr, Register d); // same as load_address + + // metadata in code that we have to keep track of + AddressLiteral allocate_metadata_address(Metadata* obj); // allocate_index + AddressLiteral constant_metadata_address(Metadata* obj); // find_index + inline void set_metadata (Metadata* obj, Register d); // uses allocate_metadata_address + inline void set_metadata_constant (Metadata* obj, Register d); // uses constant_metadata_address + inline void set_metadata (const AddressLiteral& obj_addr, Register d); // same as load_address + + void set_narrow_oop( jobject obj, Register d ); + void set_narrow_klass( Klass* k, Register d ); + + // nop padding + void align(int modulus); + + // declare a safepoint + void safepoint(); + + // factor out part of stop into subroutine to save space + void stop_subroutine(); + // factor out part of verify_oop into subroutine to save space + void verify_oop_subroutine(); + + // side-door communication with signalHandler in os_solaris.cpp + static address _verify_oop_implicit_branch[3]; + + int total_frame_size_in_bytes(int extraWords); + + // used when extraWords known statically + void save_frame(int extraWords = 0); + void save_frame_c1(int size_in_bytes); + // make a frame, and simultaneously pass up one or two register value + // into the new register window + void save_frame_and_mov(int extraWords, Register s1, Register d1, Register s2 = Register(), Register d2 = Register()); + + // give no. (outgoing) params, calc # of words will need on frame + void calc_mem_param_words(Register Rparam_words, Register Rresult); + + // used to calculate frame size dynamically + // result is in bytes and must be negated for save inst + void calc_frame_size(Register extraWords, Register resultReg); + + // calc and also save + void calc_frame_size_and_save(Register extraWords, Register resultReg); + + static void debug(char* msg, RegistersForDebugging* outWindow); + + // implementations of bytecodes used by both interpreter and compiler + + void lcmp( Register Ra_hi, Register Ra_low, + Register Rb_hi, Register Rb_low, + Register Rresult); + + void lneg( Register Rhi, Register Rlow ); + + void lshl( Register Rin_high, Register Rin_low, Register Rcount, + Register Rout_high, Register Rout_low, Register Rtemp ); + + void lshr( Register Rin_high, Register Rin_low, Register Rcount, + Register Rout_high, Register Rout_low, Register Rtemp ); + + void lushr( Register Rin_high, Register Rin_low, Register Rcount, + Register Rout_high, Register Rout_low, Register Rtemp ); + + void lcmp( Register Ra, Register Rb, Register Rresult); + + // Load and store values by size and signed-ness + void load_sized_value( Address src, Register dst, size_t size_in_bytes, bool is_signed); + void store_sized_value(Register src, Address dst, size_t size_in_bytes); + + void float_cmp( bool is_float, int unordered_result, + FloatRegister Fa, FloatRegister Fb, + Register Rresult); + + void save_all_globals_into_locals(); + void restore_globals_from_locals(); + + // These set the icc condition code to equal if the lock succeeded + // and notEqual if it failed and requires a slow case + void compiler_lock_object(Register Roop, Register Rmark, Register Rbox, + Register Rscratch, + BiasedLockingCounters* counters = NULL, + bool try_bias = UseBiasedLocking); + void compiler_unlock_object(Register Roop, Register Rmark, Register Rbox, + Register Rscratch, + bool try_bias = UseBiasedLocking); + + // Biased locking support + // Upon entry, lock_reg must point to the lock record on the stack, + // obj_reg must contain the target object, and mark_reg must contain + // the target object's header. + // Destroys mark_reg if an attempt is made to bias an anonymously + // biased lock. In this case a failure will go either to the slow + // case or fall through with the notEqual condition code set with + // the expectation that the slow case in the runtime will be called. + // In the fall-through case where the CAS-based lock is done, + // mark_reg is not destroyed. + void biased_locking_enter(Register obj_reg, Register mark_reg, Register temp_reg, + Label& done, Label* slow_case = NULL, + BiasedLockingCounters* counters = NULL); + // Upon entry, the base register of mark_addr must contain the oop. + // Destroys temp_reg. + + // If allow_delay_slot_filling is set to true, the next instruction + // emitted after this one will go in an annulled delay slot if the + // biased locking exit case failed. + void biased_locking_exit(Address mark_addr, Register temp_reg, Label& done, bool allow_delay_slot_filling = false); + + // allocation + void eden_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + void tlab_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + void zero_memory(Register base, Register index); + void incr_allocated_bytes(RegisterOrConstant size_in_bytes, + Register t1, Register t2); + + // interface method calling + void lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register temp_reg, Register temp2_reg, + Label& no_such_interface, + bool return_method = true); + + // virtual method calling + void lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result); + + // Test sub_klass against super_klass, with fast and slow paths. + + // The fast path produces a tri-state answer: yes / no / maybe-slow. + // One of the three labels can be NULL, meaning take the fall-through. + // If super_check_offset is -1, the value is loaded up from super_klass. + // No registers are killed, except temp_reg and temp2_reg. + // If super_check_offset is not -1, temp2_reg is not used and can be noreg. + void check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); + + // The rest of the type check; must be wired to a corresponding fast path. + // It does not repeat the fast path logic, so don't use it standalone. + // The temp_reg can be noreg, if no temps are available. + // It can also be sub_klass or super_klass, meaning it's OK to kill that one. + // Updates the sub's secondary super cache as necessary. + void check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Register temp3_reg, + Register temp4_reg, + Label* L_success, + Label* L_failure); + + // Simplified, combined version, good for typical uses. + // Falls through on failure. + void check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label& L_success); + + // method handles (JSR 292) + // offset relative to Gargs of argument at tos[arg_slot]. + // (arg_slot == 0 means the last argument, not the first). + RegisterOrConstant argument_offset(RegisterOrConstant arg_slot, + Register temp_reg, + int extra_slot_offset = 0); + // Address of Gargs and argument_offset. + Address argument_address(RegisterOrConstant arg_slot, + Register temp_reg = noreg, + int extra_slot_offset = 0); + + // Stack overflow checking + + // Note: this clobbers G3_scratch + void bang_stack_with_offset(int offset); + + // Writes to stack successive pages until offset reached to check for + // stack overflow + shadow pages. Clobbers tsp and scratch registers. + void bang_stack_size(Register Rsize, Register Rtsp, Register Rscratch); + + // Check for reserved stack access in method being exited (for JIT) + void reserved_stack_check(); + + void verify_tlab(); + + Condition negate_condition(Condition cond); + + // Helper functions for statistics gathering. + // Conditionally (non-atomically) increments passed counter address, preserving condition codes. + void cond_inc(Condition cond, address counter_addr, Register Rtemp1, Register Rtemp2); + // Unconditional increment. + void inc_counter(address counter_addr, Register Rtmp1, Register Rtmp2); + void inc_counter(int* counter_addr, Register Rtmp1, Register Rtmp2); + + // Use BIS for zeroing + void bis_zeroing(Register to, Register count, Register temp, Label& Ldone); + + // Update CRC-32[C] with a byte value according to constants in table + void update_byte_crc32(Register crc, Register val, Register table); + + // Reverse byte order of lower 32 bits, assuming upper 32 bits all zeros + void reverse_bytes_32(Register src, Register dst, Register tmp); + void movitof_revbytes(Register src, FloatRegister dst, Register tmp1, Register tmp2); + void movftoi_revbytes(FloatRegister src, Register dst, Register tmp1, Register tmp2); + + // CRC32 code for java.util.zip.CRC32::updateBytes0() intrinsic. + void kernel_crc32(Register crc, Register buf, Register len, Register table); + // Fold 128-bit data chunk + void fold_128bit_crc32(Register xcrc_hi, Register xcrc_lo, Register xK_hi, Register xK_lo, Register xtmp_hi, Register xtmp_lo, Register buf, int offset); + void fold_128bit_crc32(Register xcrc_hi, Register xcrc_lo, Register xK_hi, Register xK_lo, Register xtmp_hi, Register xtmp_lo, Register xbuf_hi, Register xbuf_lo); + // Fold 8-bit data + void fold_8bit_crc32(Register xcrc, Register table, Register xtmp, Register tmp); + void fold_8bit_crc32(Register crc, Register table, Register tmp); + // CRC32C code for java.util.zip.CRC32C::updateBytes/updateDirectByteBuffer intrinsic. + void kernel_crc32c(Register crc, Register buf, Register len, Register table); + +}; + +/** + * class SkipIfEqual: + * + * Instantiating this class will result in assembly code being output that will + * jump around any code emitted between the creation of the instance and it's + * automatic destruction at the end of a scope block, depending on the value of + * the flag passed to the constructor, which will be checked at run-time. + */ +class SkipIfEqual : public StackObj { + private: + MacroAssembler* _masm; + Label _label; + + public: + // 'temp' is a temp register that this object can use (and trash) + SkipIfEqual(MacroAssembler*, Register temp, + const bool* flag_addr, Assembler::Condition condition); + ~SkipIfEqual(); +}; + +#endif // CPU_SPARC_MACROASSEMBLER_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/macroAssembler_sparc.inline.hpp b/src/hotspot/cpu/sparc/macroAssembler_sparc.inline.hpp --- a/src/hotspot/cpu/sparc/macroAssembler_sparc.inline.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/macroAssembler_sparc.inline.hpp 2023-04-16 11:42:11.068078777 +0000 @@ -0,0 +1,703 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_MACROASSEMBLER_SPARC_INLINE_HPP +#define CPU_SPARC_MACROASSEMBLER_SPARC_INLINE_HPP + +#include "asm/assembler.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/codeBuffer.hpp" +#include "code/codeCache.hpp" + +inline bool Address::is_simm13(int offset) { return Assembler::is_simm13(disp() + offset); } + + +inline int AddressLiteral::low10() const { + return Assembler::low10(value()); +} + + +inline void MacroAssembler::pd_patch_instruction(address branch, address target, const char* file, int line) { + jint& stub_inst = *(jint*) branch; + stub_inst = patched_branch(target - branch, stub_inst, 0); +} + +// Use the right loads/stores for the platform +inline void MacroAssembler::ld_ptr( Register s1, Register s2, Register d ) { + Assembler::ldx(s1, s2, d); +} + +inline void MacroAssembler::ld_ptr( Register s1, int simm13a, Register d ) { + Assembler::ldx(s1, simm13a, d); +} + +inline void MacroAssembler::ld_ptr( Register s1, ByteSize simm13a, Register d ) { + ld_ptr(s1, in_bytes(simm13a), d); +} + +inline void MacroAssembler::ld_ptr( Register s1, RegisterOrConstant s2, Register d ) { + ldx(s1, s2, d); +} + +inline void MacroAssembler::ld_ptr(const Address& a, Register d, int offset) { + ldx(a, d, offset); +} + +inline void MacroAssembler::st_ptr( Register d, Register s1, Register s2 ) { + Assembler::stx(d, s1, s2); +} + +inline void MacroAssembler::st_ptr( Register d, Register s1, int simm13a ) { + Assembler::stx(d, s1, simm13a); +} + +inline void MacroAssembler::st_ptr( Register d, Register s1, ByteSize simm13a ) { + st_ptr(d, s1, in_bytes(simm13a)); +} + +inline void MacroAssembler::st_ptr( Register d, Register s1, RegisterOrConstant s2 ) { + stx(d, s1, s2); +} + +inline void MacroAssembler::st_ptr(Register d, const Address& a, int offset) { + stx(d, a, offset); +} + +// Use the right loads/stores for the platform +inline void MacroAssembler::ld_long( Register s1, Register s2, Register d ) { + Assembler::ldx(s1, s2, d); +} + +inline void MacroAssembler::ld_long( Register s1, int simm13a, Register d ) { + Assembler::ldx(s1, simm13a, d); +} + +inline void MacroAssembler::ld_long( Register s1, RegisterOrConstant s2, Register d ) { + ldx(s1, s2, d); +} + +inline void MacroAssembler::ld_long(const Address& a, Register d, int offset) { + ldx(a, d, offset); +} + +inline void MacroAssembler::st_long( Register d, Register s1, Register s2 ) { + Assembler::stx(d, s1, s2); +} + +inline void MacroAssembler::st_long( Register d, Register s1, int simm13a ) { + Assembler::stx(d, s1, simm13a); +} + +inline void MacroAssembler::st_long( Register d, Register s1, RegisterOrConstant s2 ) { + stx(d, s1, s2); +} + +inline void MacroAssembler::st_long( Register d, const Address& a, int offset ) { + stx(d, a, offset); +} + +inline void MacroAssembler::stbool(Register d, const Address& a) { stb(d, a); } +inline void MacroAssembler::ldbool(const Address& a, Register d) { ldub(a, d); } +inline void MacroAssembler::movbool( bool boolconst, Register d) { mov( (int) boolconst, d); } + + +inline void MacroAssembler::signx( Register s, Register d ) { sra( s, G0, d); } +inline void MacroAssembler::signx( Register d ) { sra( d, G0, d); } + +inline void MacroAssembler::not1( Register s, Register d ) { xnor( s, G0, d ); } +inline void MacroAssembler::not1( Register d ) { xnor( d, G0, d ); } + +inline void MacroAssembler::neg( Register s, Register d ) { sub( G0, s, d ); } +inline void MacroAssembler::neg( Register d ) { sub( G0, d, d ); } + +inline void MacroAssembler::cas( Register s1, Register s2, Register d) { casa( s1, s2, d, ASI_PRIMARY); } +inline void MacroAssembler::casx( Register s1, Register s2, Register d) { casxa(s1, s2, d, ASI_PRIMARY); } + +// Functions for isolating 64 bit atomic swaps for LP64 +// cas_ptr will perform cas for 32 bit VM's and casx for 64 bit VM's +inline void MacroAssembler::cas_ptr( Register s1, Register s2, Register d) { + casx( s1, s2, d ); +} + +// Functions for isolating 64 bit shifts for LP64 + +inline void MacroAssembler::sll_ptr( Register s1, Register s2, Register d ) { + Assembler::sllx(s1, s2, d); +} + +inline void MacroAssembler::sll_ptr( Register s1, int imm6a, Register d ) { + Assembler::sllx(s1, imm6a, d); +} + +inline void MacroAssembler::srl_ptr( Register s1, Register s2, Register d ) { + Assembler::srlx(s1, s2, d); +} + +inline void MacroAssembler::srl_ptr( Register s1, int imm6a, Register d ) { + Assembler::srlx(s1, imm6a, d); +} + +inline void MacroAssembler::sll_ptr( Register s1, RegisterOrConstant s2, Register d ) { + if (s2.is_register()) sll_ptr(s1, s2.as_register(), d); + else sll_ptr(s1, s2.as_constant(), d); +} + +inline void MacroAssembler::casl( Register s1, Register s2, Register d) { casa( s1, s2, d, ASI_PRIMARY_LITTLE); } +inline void MacroAssembler::casxl( Register s1, Register s2, Register d) { casxa(s1, s2, d, ASI_PRIMARY_LITTLE); } + +inline void MacroAssembler::inc( Register d, int const13 ) { add( d, const13, d); } +inline void MacroAssembler::inccc( Register d, int const13 ) { addcc( d, const13, d); } + +inline void MacroAssembler::dec( Register d, int const13 ) { sub( d, const13, d); } +inline void MacroAssembler::deccc( Register d, int const13 ) { subcc( d, const13, d); } + +// Use the right branch for the platform + +inline void MacroAssembler::br( Condition c, bool a, Predict p, address d, relocInfo::relocType rt ) { + Assembler::bp(c, a, icc, p, d, rt); +} + +inline void MacroAssembler::br( Condition c, bool a, Predict p, Label& L ) { + // See note[+] on 'avoid_pipeline_stall()', in "assembler_sparc.inline.hpp". + avoid_pipeline_stall(); + br(c, a, p, target(L)); +} + + +// Branch that tests either xcc or icc depending on the +// architecture compiled (LP64 or not) +inline void MacroAssembler::brx( Condition c, bool a, Predict p, address d, relocInfo::relocType rt ) { + Assembler::bp(c, a, xcc, p, d, rt); +} + +inline void MacroAssembler::brx( Condition c, bool a, Predict p, Label& L ) { + avoid_pipeline_stall(); + brx(c, a, p, target(L)); +} + +inline void MacroAssembler::ba( Label& L ) { + br(always, false, pt, L); +} + +// Warning: V9 only functions +inline void MacroAssembler::bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) { + Assembler::bp(c, a, cc, p, d, rt); +} + +inline void MacroAssembler::bp( Condition c, bool a, CC cc, Predict p, Label& L ) { + Assembler::bp(c, a, cc, p, L); +} + +inline void MacroAssembler::fb( Condition c, bool a, Predict p, address d, relocInfo::relocType rt ) { + fbp(c, a, fcc0, p, d, rt); +} + +inline void MacroAssembler::fb( Condition c, bool a, Predict p, Label& L ) { + avoid_pipeline_stall(); + fb(c, a, p, target(L)); +} + +inline void MacroAssembler::fbp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) { + Assembler::fbp(c, a, cc, p, d, rt); +} + +inline void MacroAssembler::fbp( Condition c, bool a, CC cc, Predict p, Label& L ) { + Assembler::fbp(c, a, cc, p, L); +} + +inline void MacroAssembler::jmp( Register s1, Register s2 ) { jmpl( s1, s2, G0 ); } +inline void MacroAssembler::jmp( Register s1, int simm13a, RelocationHolder const& rspec ) { jmpl( s1, simm13a, G0, rspec); } + +inline bool MacroAssembler::is_far_target(address d) { + if (ForceUnreachable) { + // References outside the code cache should be treated as far + return d < CodeCache::low_bound() || d > CodeCache::high_bound(); + } + return !is_in_wdisp30_range(d, CodeCache::low_bound()) || !is_in_wdisp30_range(d, CodeCache::high_bound()); +} + +// Call with a check to see if we need to deal with the added +// expense of relocation and if we overflow the displacement +// of the quick call instruction. +inline void MacroAssembler::call( address d, relocInfo::relocType rt ) { + MacroAssembler::call(d, Relocation::spec_simple(rt)); +} + +inline void MacroAssembler::call( address d, RelocationHolder const& rspec ) { + intptr_t disp; + // NULL is ok because it will be relocated later. + // Must change NULL to a reachable address in order to + // pass asserts here and in wdisp. + if ( d == NULL ) + d = pc(); + + // Is this address within range of the call instruction? + // If not, use the expensive instruction sequence + if (is_far_target(d)) { + relocate(rspec); + AddressLiteral dest(d); + jumpl_to(dest, O7, O7); + } else { + Assembler::call(d, rspec); + } +} + +inline void MacroAssembler::call( Label& L, relocInfo::relocType rt ) { + avoid_pipeline_stall(); + MacroAssembler::call(target(L), rt); +} + + +inline void MacroAssembler::callr( Register s1, Register s2 ) { jmpl( s1, s2, O7 ); } +inline void MacroAssembler::callr( Register s1, int simm13a, RelocationHolder const& rspec ) { jmpl( s1, simm13a, O7, rspec); } + +inline void MacroAssembler::tst( Register s ) { orcc( G0, s, G0 ); } + +inline void MacroAssembler::ret( bool trace ) { + if (trace) { + mov(I7, O7); // traceable register + JMP(O7, 2 * BytesPerInstWord); + } else { + jmpl( I7, 2 * BytesPerInstWord, G0 ); + } +} + +inline void MacroAssembler::retl( bool trace ) { + if (trace) { + JMP(O7, 2 * BytesPerInstWord); + } else { + jmpl( O7, 2 * BytesPerInstWord, G0 ); + } +} + + +inline void MacroAssembler::cmp( Register s1, Register s2 ) { subcc( s1, s2, G0 ); } +inline void MacroAssembler::cmp( Register s1, int simm13a ) { subcc( s1, simm13a, G0 ); } + +// Note: All MacroAssembler::set_foo functions are defined out-of-line. + + +// Loads the current PC of the following instruction as an immediate value in +// 2 instructions. All PCs in the CodeCache are within 2 Gig of each other. +inline intptr_t MacroAssembler::load_pc_address( Register reg, int bytes_to_skip ) { + intptr_t thepc = (intptr_t)pc() + 2*BytesPerInstWord + bytes_to_skip; + Unimplemented(); + return thepc; +} + + +inline void MacroAssembler::load_contents(const AddressLiteral& addrlit, Register d, int offset) { + assert_not_delayed(); + if (ForceUnreachable) { + patchable_sethi(addrlit, d); + } else { + sethi(addrlit, d); + } + ld(d, addrlit.low10() + offset, d); +} + + +inline void MacroAssembler::load_bool_contents(const AddressLiteral& addrlit, Register d, int offset) { + assert_not_delayed(); + if (ForceUnreachable) { + patchable_sethi(addrlit, d); + } else { + sethi(addrlit, d); + } + ldub(d, addrlit.low10() + offset, d); +} + + +inline void MacroAssembler::load_ptr_contents(const AddressLiteral& addrlit, Register d, int offset) { + assert_not_delayed(); + if (ForceUnreachable) { + patchable_sethi(addrlit, d); + } else { + sethi(addrlit, d); + } + ld_ptr(d, addrlit.low10() + offset, d); +} + + +inline void MacroAssembler::store_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset) { + assert_not_delayed(); + if (ForceUnreachable) { + patchable_sethi(addrlit, temp); + } else { + sethi(addrlit, temp); + } + st(s, temp, addrlit.low10() + offset); +} + + +inline void MacroAssembler::store_ptr_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset) { + assert_not_delayed(); + if (ForceUnreachable) { + patchable_sethi(addrlit, temp); + } else { + sethi(addrlit, temp); + } + st_ptr(s, temp, addrlit.low10() + offset); +} + + +// This code sequence is relocatable to any address, even on LP64. +inline void MacroAssembler::jumpl_to(const AddressLiteral& addrlit, Register temp, Register d, int offset) { + assert_not_delayed(); + // Force fixed length sethi because NativeJump and NativeFarCall don't handle + // variable length instruction streams. + patchable_sethi(addrlit, temp); + jmpl(temp, addrlit.low10() + offset, d); +} + + +inline void MacroAssembler::jump_to(const AddressLiteral& addrlit, Register temp, int offset) { + jumpl_to(addrlit, temp, G0, offset); +} + + +inline void MacroAssembler::jump_indirect_to(Address& a, Register temp, + int ld_offset, int jmp_offset) { + assert_not_delayed(); + //sethi(al); // sethi is caller responsibility for this one + ld_ptr(a, temp, ld_offset); + jmp(temp, jmp_offset); +} + + +inline void MacroAssembler::set_metadata(Metadata* obj, Register d) { + set_metadata(allocate_metadata_address(obj), d); +} + +inline void MacroAssembler::set_metadata_constant(Metadata* obj, Register d) { + set_metadata(constant_metadata_address(obj), d); +} + +inline void MacroAssembler::set_metadata(const AddressLiteral& obj_addr, Register d) { + assert(obj_addr.rspec().type() == relocInfo::metadata_type, "must be a metadata reloc"); + set(obj_addr, d); +} + +inline void MacroAssembler::set_oop(jobject obj, Register d) { + set_oop(allocate_oop_address(obj), d); +} + + +inline void MacroAssembler::set_oop_constant(jobject obj, Register d) { + set_oop(constant_oop_address(obj), d); +} + + +inline void MacroAssembler::set_oop(const AddressLiteral& obj_addr, Register d) { + assert(obj_addr.rspec().type() == relocInfo::oop_type, "must be an oop reloc"); + set(obj_addr, d); +} + + +inline void MacroAssembler::load_argument( Argument& a, Register d ) { + if (a.is_register()) + mov(a.as_register(), d); + else + ld (a.as_address(), d); +} + +inline void MacroAssembler::store_argument( Register s, Argument& a ) { + if (a.is_register()) + mov(s, a.as_register()); + else + st_ptr (s, a.as_address()); // ABI says everything is right justified. +} + +inline void MacroAssembler::store_ptr_argument( Register s, Argument& a ) { + if (a.is_register()) + mov(s, a.as_register()); + else + st_ptr (s, a.as_address()); +} + + +inline void MacroAssembler::store_float_argument( FloatRegister s, Argument& a ) { + if (a.is_float_register()) +// V9 ABI has F1, F3, F5 are used to pass instead of O0, O1, O2 + fmov(FloatRegisterImpl::S, s, a.as_float_register() ); + else + // Floats are stored in the high half of the stack entry + // The low half is undefined per the ABI. + stf(FloatRegisterImpl::S, s, a.as_address(), sizeof(jfloat)); +} + +inline void MacroAssembler::store_double_argument( FloatRegister s, Argument& a ) { + if (a.is_float_register()) +// V9 ABI has D0, D2, D4 are used to pass instead of O0, O1, O2 + fmov(FloatRegisterImpl::D, s, a.as_double_register() ); + else + stf(FloatRegisterImpl::D, s, a.as_address()); +} + +inline void MacroAssembler::store_long_argument( Register s, Argument& a ) { + if (a.is_register()) + mov(s, a.as_register()); + else + stx(s, a.as_address()); +} + +inline void MacroAssembler::round_to( Register r, int modulus ) { + assert_not_delayed(); + inc( r, modulus - 1 ); + and3( r, -modulus, r ); +} + +inline void MacroAssembler::add(Register s1, int simm13a, Register d, relocInfo::relocType rtype) { + relocate(rtype); + add(s1, simm13a, d); +} +inline void MacroAssembler::add(Register s1, int simm13a, Register d, RelocationHolder const& rspec) { + relocate(rspec); + add(s1, simm13a, d); +} + +// form effective addresses this way: +inline void MacroAssembler::add(const Address& a, Register d, int offset) { + if (a.has_index()) add(a.base(), a.index(), d); + else { add(a.base(), a.disp() + offset, d, a.rspec(offset)); offset = 0; } + if (offset != 0) add(d, offset, d); +} +inline void MacroAssembler::add(Register s1, RegisterOrConstant s2, Register d, int offset) { + if (s2.is_register()) add(s1, s2.as_register(), d); + else { add(s1, s2.as_constant() + offset, d); offset = 0; } + if (offset != 0) add(d, offset, d); +} + +inline void MacroAssembler::andn(Register s1, RegisterOrConstant s2, Register d) { + if (s2.is_register()) andn(s1, s2.as_register(), d); + else andn(s1, s2.as_constant(), d); +} + +inline void MacroAssembler::btst( Register s1, Register s2 ) { andcc( s1, s2, G0 ); } +inline void MacroAssembler::btst( int simm13a, Register s ) { andcc( s, simm13a, G0 ); } + +inline void MacroAssembler::bset( Register s1, Register s2 ) { or3( s1, s2, s2 ); } +inline void MacroAssembler::bset( int simm13a, Register s ) { or3( s, simm13a, s ); } + +inline void MacroAssembler::bclr( Register s1, Register s2 ) { andn( s1, s2, s2 ); } +inline void MacroAssembler::bclr( int simm13a, Register s ) { andn( s, simm13a, s ); } + +inline void MacroAssembler::btog( Register s1, Register s2 ) { xor3( s1, s2, s2 ); } +inline void MacroAssembler::btog( int simm13a, Register s ) { xor3( s, simm13a, s ); } + +inline void MacroAssembler::clr( Register d ) { or3( G0, G0, d ); } + +inline void MacroAssembler::clrb( Register s1, Register s2) { stb( G0, s1, s2 ); } +inline void MacroAssembler::clrh( Register s1, Register s2) { sth( G0, s1, s2 ); } +inline void MacroAssembler::clr( Register s1, Register s2) { stw( G0, s1, s2 ); } +inline void MacroAssembler::clrx( Register s1, Register s2) { stx( G0, s1, s2 ); } + +inline void MacroAssembler::clrb( Register s1, int simm13a) { stb( G0, s1, simm13a); } +inline void MacroAssembler::clrh( Register s1, int simm13a) { sth( G0, s1, simm13a); } +inline void MacroAssembler::clr( Register s1, int simm13a) { stw( G0, s1, simm13a); } +inline void MacroAssembler::clrx( Register s1, int simm13a) { stx( G0, s1, simm13a); } + +inline void MacroAssembler::clruw( Register s, Register d ) { srl( s, G0, d); } +inline void MacroAssembler::clruwu( Register d ) { srl( d, G0, d); } + +// Make all 32 bit loads signed so 64 bit registers maintain proper sign +inline void MacroAssembler::ld( Register s1, Register s2, Register d) { ldsw( s1, s2, d); } +inline void MacroAssembler::ld( Register s1, int simm13a, Register d) { ldsw( s1, simm13a, d); } + +inline void MacroAssembler::ld(Register s1, ByteSize simm13a, Register d) { ldsw( s1, in_bytes(simm13a), d); } + +inline void MacroAssembler::ld( const Address& a, Register d, int offset) { + if (a.has_index()) { assert(offset == 0, ""); ld( a.base(), a.index(), d); } + else { ld( a.base(), a.disp() + offset, d); } +} + +inline void MacroAssembler::ldsb(const Address& a, Register d, int offset) { + if (a.has_index()) { assert(offset == 0, ""); ldsb(a.base(), a.index(), d); } + else { ldsb(a.base(), a.disp() + offset, d); } +} +inline void MacroAssembler::ldsh(const Address& a, Register d, int offset) { + if (a.has_index()) { assert(offset == 0, ""); ldsh(a.base(), a.index(), d); } + else { ldsh(a.base(), a.disp() + offset, d); } +} +inline void MacroAssembler::ldsw(const Address& a, Register d, int offset) { + if (a.has_index()) { assert(offset == 0, ""); ldsw(a.base(), a.index(), d); } + else { ldsw(a.base(), a.disp() + offset, d); } +} +inline void MacroAssembler::ldub(const Address& a, Register d, int offset) { + if (a.has_index()) { assert(offset == 0, ""); ldub(a.base(), a.index(), d); } + else { ldub(a.base(), a.disp() + offset, d); } +} +inline void MacroAssembler::lduh(const Address& a, Register d, int offset) { + if (a.has_index()) { assert(offset == 0, ""); lduh(a.base(), a.index(), d); } + else { lduh(a.base(), a.disp() + offset, d); } +} +inline void MacroAssembler::lduw(const Address& a, Register d, int offset) { + if (a.has_index()) { assert(offset == 0, ""); lduw(a.base(), a.index(), d); } + else { lduw(a.base(), a.disp() + offset, d); } +} +inline void MacroAssembler::ldd( const Address& a, Register d, int offset) { + if (a.has_index()) { assert(offset == 0, ""); ldd( a.base(), a.index(), d); } + else { ldd( a.base(), a.disp() + offset, d); } +} +inline void MacroAssembler::ldx( const Address& a, Register d, int offset) { + if (a.has_index()) { assert(offset == 0, ""); ldx( a.base(), a.index(), d); } + else { ldx( a.base(), a.disp() + offset, d); } +} + +inline void MacroAssembler::ldub(Register s1, RegisterOrConstant s2, Register d) { ldub(Address(s1, s2), d); } +inline void MacroAssembler::ldsb(Register s1, RegisterOrConstant s2, Register d) { ldsb(Address(s1, s2), d); } +inline void MacroAssembler::lduh(Register s1, RegisterOrConstant s2, Register d) { lduh(Address(s1, s2), d); } +inline void MacroAssembler::ldsh(Register s1, RegisterOrConstant s2, Register d) { ldsh(Address(s1, s2), d); } +inline void MacroAssembler::lduw(Register s1, RegisterOrConstant s2, Register d) { lduw(Address(s1, s2), d); } +inline void MacroAssembler::ldsw(Register s1, RegisterOrConstant s2, Register d) { ldsw(Address(s1, s2), d); } +inline void MacroAssembler::ldx( Register s1, RegisterOrConstant s2, Register d) { ldx( Address(s1, s2), d); } +inline void MacroAssembler::ld( Register s1, RegisterOrConstant s2, Register d) { ld( Address(s1, s2), d); } +inline void MacroAssembler::ldd( Register s1, RegisterOrConstant s2, Register d) { ldd( Address(s1, s2), d); } + +inline void MacroAssembler::ldf(FloatRegisterImpl::Width w, Register s1, RegisterOrConstant s2, FloatRegister d) { + if (s2.is_register()) ldf(w, s1, s2.as_register(), d); + else ldf(w, s1, s2.as_constant(), d); +} + +inline void MacroAssembler::ldf(FloatRegisterImpl::Width w, const Address& a, FloatRegister d, int offset) { + relocate(a.rspec(offset)); + if (a.has_index()) { + assert(offset == 0, ""); + ldf(w, a.base(), a.index(), d); + } else { + ldf(w, a.base(), a.disp() + offset, d); + } +} + +inline void MacroAssembler::lduwl(Register s1, Register s2, Register d) { lduwa(s1, s2, ASI_PRIMARY_LITTLE, d); } +inline void MacroAssembler::ldswl(Register s1, Register s2, Register d) { ldswa(s1, s2, ASI_PRIMARY_LITTLE, d);} +inline void MacroAssembler::ldxl( Register s1, Register s2, Register d) { ldxa(s1, s2, ASI_PRIMARY_LITTLE, d); } +inline void MacroAssembler::ldfl(FloatRegisterImpl::Width w, Register s1, Register s2, FloatRegister d) { ldfa(w, s1, s2, ASI_PRIMARY_LITTLE, d); } + +// returns if membar generates anything, obviously this code should mirror +// membar below. +inline bool MacroAssembler::membar_has_effect( Membar_mask_bits const7a ) { + const Membar_mask_bits effective_mask = + Membar_mask_bits(const7a & ~(LoadLoad | LoadStore | StoreStore)); + return (effective_mask != 0); +} + +inline void MacroAssembler::membar( Membar_mask_bits const7a ) { + // Weakened for current Sparcs and TSO. See the v9 manual, sections 8.4.3, + // 8.4.4.3, a.31 and a.50. + // Under TSO, setting bit 3, 2, or 0 is redundant, so the only value + // of the mmask subfield of const7a that does anything that isn't done + // implicitly is StoreLoad. + const Membar_mask_bits effective_mask = + Membar_mask_bits(const7a & ~(LoadLoad | LoadStore | StoreStore)); + if (effective_mask != 0) { + Assembler::membar(effective_mask); + } +} + +inline void MacroAssembler::mov(Register s, Register d) { + if (s != d) { + or3(G0, s, d); + } else { + assert_not_delayed(); // Put something useful in the delay slot! + } +} + +inline void MacroAssembler::mov_or_nop(Register s, Register d) { + if (s != d) { + or3(G0, s, d); + } else { + nop(); + } +} + +inline void MacroAssembler::mov( int simm13a, Register d) { or3( G0, simm13a, d); } + +inline void MacroAssembler::prefetch(const Address& a, PrefetchFcn f, int offset) { + relocate(a.rspec(offset)); + assert(!a.has_index(), ""); + prefetch(a.base(), a.disp() + offset, f); +} + +inline void MacroAssembler::st(Register d, Register s1, Register s2) { stw(d, s1, s2); } +inline void MacroAssembler::st(Register d, Register s1, int simm13a) { stw(d, s1, simm13a); } + +inline void MacroAssembler::st(Register d, Register s1, ByteSize simm13a) { stw(d, s1, in_bytes(simm13a)); } + +inline void MacroAssembler::st(Register d, const Address& a, int offset) { + if (a.has_index()) { assert(offset == 0, ""); st( d, a.base(), a.index() ); } + else { st( d, a.base(), a.disp() + offset); } +} + +inline void MacroAssembler::stb(Register d, const Address& a, int offset) { + if (a.has_index()) { assert(offset == 0, ""); stb(d, a.base(), a.index() ); } + else { stb(d, a.base(), a.disp() + offset); } +} +inline void MacroAssembler::sth(Register d, const Address& a, int offset) { + if (a.has_index()) { assert(offset == 0, ""); sth(d, a.base(), a.index() ); } + else { sth(d, a.base(), a.disp() + offset); } +} +inline void MacroAssembler::stw(Register d, const Address& a, int offset) { + if (a.has_index()) { assert(offset == 0, ""); stw(d, a.base(), a.index() ); } + else { stw(d, a.base(), a.disp() + offset); } +} +inline void MacroAssembler::std(Register d, const Address& a, int offset) { + if (a.has_index()) { assert(offset == 0, ""); std(d, a.base(), a.index() ); } + else { std(d, a.base(), a.disp() + offset); } +} +inline void MacroAssembler::stx(Register d, const Address& a, int offset) { + if (a.has_index()) { assert(offset == 0, ""); stx(d, a.base(), a.index() ); } + else { stx(d, a.base(), a.disp() + offset); } +} + +inline void MacroAssembler::stb(Register d, Register s1, RegisterOrConstant s2) { stb(d, Address(s1, s2)); } +inline void MacroAssembler::sth(Register d, Register s1, RegisterOrConstant s2) { sth(d, Address(s1, s2)); } +inline void MacroAssembler::stw(Register d, Register s1, RegisterOrConstant s2) { stw(d, Address(s1, s2)); } +inline void MacroAssembler::stx(Register d, Register s1, RegisterOrConstant s2) { stx(d, Address(s1, s2)); } +inline void MacroAssembler::std(Register d, Register s1, RegisterOrConstant s2) { std(d, Address(s1, s2)); } +inline void MacroAssembler::st( Register d, Register s1, RegisterOrConstant s2) { st( d, Address(s1, s2)); } + +inline void MacroAssembler::stf(FloatRegisterImpl::Width w, FloatRegister d, Register s1, RegisterOrConstant s2) { + if (s2.is_register()) stf(w, d, s1, s2.as_register()); + else stf(w, d, s1, s2.as_constant()); +} + +inline void MacroAssembler::stf(FloatRegisterImpl::Width w, FloatRegister d, const Address& a, int offset) { + relocate(a.rspec(offset)); + if (a.has_index()) { assert(offset == 0, ""); stf(w, d, a.base(), a.index() ); } + else { stf(w, d, a.base(), a.disp() + offset); } +} + +inline void MacroAssembler::sub(Register s1, RegisterOrConstant s2, Register d, int offset) { + if (s2.is_register()) sub(s1, s2.as_register(), d); + else { sub(s1, s2.as_constant() + offset, d); offset = 0; } + if (offset != 0) sub(d, offset, d); +} + +inline void MacroAssembler::swap(const Address& a, Register d, int offset) { + relocate(a.rspec(offset)); + if (a.has_index()) { assert(offset == 0, ""); swap(a.base(), a.index(), d ); } + else { swap(a.base(), a.disp() + offset, d); } +} +#endif // CPU_SPARC_MACROASSEMBLER_SPARC_INLINE_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/memset_with_concurrent_readers_sparc.cpp b/src/hotspot/cpu/sparc/memset_with_concurrent_readers_sparc.cpp --- a/src/hotspot/cpu/sparc/memset_with_concurrent_readers_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/memset_with_concurrent_readers_sparc.cpp 2023-04-16 11:42:11.068242242 +0000 @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" + +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/memset_with_concurrent_readers.hpp" +#include "runtime/prefetch.inline.hpp" +#include "utilities/align.hpp" +#include "utilities/debug.hpp" +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// An implementation of memset, for use when there may be concurrent +// readers of the region being stored into. +// +// We can't use the standard library memset if it is implemented using +// block initializing stores. Doing so can result in concurrent readers +// seeing spurious zeros. +// +// We can't use the obvious C/C++ for-loop, because the compiler may +// recognize the idiomatic loop and optimize it into a call to the +// standard library memset; we've seen exactly this happen with, for +// example, Solaris Studio 12.3. Hence the use of inline assembly +// code, hiding loops from the compiler's optimizer. +// +// We don't attempt to use the standard library memset when it is safe +// to do so. We could conservatively do so by detecting the presence +// of block initializing stores (VM_Version::has_blk_init()), but the +// implementation provided here should be sufficient. + +inline void fill_subword(void* start, void* end, int value) { + STATIC_ASSERT(BytesPerWord == 8); + assert(pointer_delta(end, start, 1) < (size_t)BytesPerWord, "precondition"); + // Dispatch on (end - start). + void* pc; + __asm__ volatile( + // offset := (7 - (end - start)) + 3 + // 3 instructions from rdpc to DISPATCH + " sub %[offset], %[end], %[offset]\n\t" // offset := start - end + " sllx %[offset], 2, %[offset]\n\t" // scale offset for instruction size of 4 + " add %[offset], 40, %[offset]\n\t" // offset += 10 * instruction size + " rd %%pc, %[pc]\n\t" // dispatch on scaled offset + " jmpl %[pc]+%[offset], %%g0\n\t" + " nop\n\t" + // DISPATCH: no direct reference, but without it the store block may be elided. + "1:\n\t" + " stb %[value], [%[end]-7]\n\t" // end[-7] = value + " stb %[value], [%[end]-6]\n\t" + " stb %[value], [%[end]-5]\n\t" + " stb %[value], [%[end]-4]\n\t" + " stb %[value], [%[end]-3]\n\t" + " stb %[value], [%[end]-2]\n\t" + " stb %[value], [%[end]-1]\n\t" // end[-1] = value + : /* only temporaries/overwritten outputs */ + [pc] "=&r" (pc), // temp + [offset] "+&r" (start) + : [end] "r" (end), + [value] "r" (value) + : "memory"); +} + +void memset_with_concurrent_readers(void* to, int value, size_t size) { + Prefetch::write(to, 0); + void* end = static_cast(to) + size; + if (size >= (size_t)BytesPerWord) { + // Fill any partial word prefix. + uintx* aligned_to = static_cast(align_up(to, BytesPerWord)); + fill_subword(to, aligned_to, value); + + // Compute fill word. + STATIC_ASSERT(BitsPerByte == 8); + STATIC_ASSERT(BitsPerWord == 64); + uintx xvalue = value & 0xff; + xvalue |= (xvalue << 8); + xvalue |= (xvalue << 16); + xvalue |= (xvalue << 32); + + uintx* aligned_end = static_cast(align_down(end, BytesPerWord)); + assert(aligned_to <= aligned_end, "invariant"); + + // for ( ; aligned_to < aligned_end; ++aligned_to) { + // *aligned_to = xvalue; + // } + uintptr_t temp; + __asm__ volatile( + // Unroll loop x8. + " sub %[aend], %[ato], %[temp]\n\t" + " cmp %[temp], 56\n\t" // cc := (aligned_end - aligned_to) > 7 words + " ba %%xcc, 2f\n\t" // goto TEST always + " sub %[aend], 56, %[temp]\n\t" // limit := aligned_end - 7 words + // LOOP: + "1:\n\t" // unrolled x8 store loop top + " cmp %[temp], %[ato]\n\t" // cc := limit > (next) aligned_to + " stx %[xvalue], [%[ato]-64]\n\t" // store 8 words, aligned_to pre-incremented + " stx %[xvalue], [%[ato]-56]\n\t" + " stx %[xvalue], [%[ato]-48]\n\t" + " stx %[xvalue], [%[ato]-40]\n\t" + " stx %[xvalue], [%[ato]-32]\n\t" + " stx %[xvalue], [%[ato]-24]\n\t" + " stx %[xvalue], [%[ato]-16]\n\t" + " stx %[xvalue], [%[ato]-8]\n\t" + // TEST: + "2:\n\t" + " bgu,a %%xcc, 1b\n\t" // goto LOOP if more than 7 words remaining + " add %[ato], 64, %[ato]\n\t" // aligned_to += 8, for next iteration + // Fill remaining < 8 full words. + // Dispatch on (aligned_end - aligned_to). + // offset := (7 - (aligned_end - aligned_to)) + 3 + // 3 instructions from rdpc to DISPATCH + " sub %[ato], %[aend], %[ato]\n\t" // offset := aligned_to - aligned_end + " srax %[ato], 1, %[ato]\n\t" // scale offset for instruction size of 4 + " add %[ato], 40, %[ato]\n\t" // offset += 10 * instruction size + " rd %%pc, %[temp]\n\t" // dispatch on scaled offset + " jmpl %[temp]+%[ato], %%g0\n\t" + " nop\n\t" + // DISPATCH: no direct reference, but without it the store block may be elided. + "3:\n\t" + " stx %[xvalue], [%[aend]-56]\n\t" // aligned_end[-7] = xvalue + " stx %[xvalue], [%[aend]-48]\n\t" + " stx %[xvalue], [%[aend]-40]\n\t" + " stx %[xvalue], [%[aend]-32]\n\t" + " stx %[xvalue], [%[aend]-24]\n\t" + " stx %[xvalue], [%[aend]-16]\n\t" + " stx %[xvalue], [%[aend]-8]\n\t" // aligned_end[-1] = xvalue + : /* only temporaries/overwritten outputs */ + [temp] "=&r" (temp), + [ato] "+&r" (aligned_to) + : [aend] "r" (aligned_end), + [xvalue] "r" (xvalue) + : "cc", "memory"); + to = aligned_end; // setup for suffix + } + // Fill any partial word suffix. Also the prefix if size < BytesPerWord. + fill_subword(to, end, value); +} diff -ur --new-file a/src/hotspot/cpu/sparc/methodHandles_sparc.cpp b/src/hotspot/cpu/sparc/methodHandles_sparc.cpp --- a/src/hotspot/cpu/sparc/methodHandles_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/methodHandles_sparc.cpp 2023-04-16 11:42:11.068545848 +0000 @@ -0,0 +1,630 @@ +/* + * Copyright (c) 2008, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "jvm.h" +#include "asm/macroAssembler.inline.hpp" +#include "classfile/javaClasses.inline.hpp" +#include "classfile/vmClasses.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interp_masm.hpp" +#include "logging/log.hpp" +#include "memory/allocation.inline.hpp" +#include "memory/resourceArea.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/flags/flagSetting.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/preserveException.hpp" + +#define __ _masm-> + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#define STOP(error) stop(error) +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#define STOP(error) block_comment(error); __ stop(error) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +// Workaround for C++ overloading nastiness on '0' for RegisterOrConstant. +static RegisterOrConstant constant(int value) { + return RegisterOrConstant(value); +} + +void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg, Register temp_reg, Register temp2_reg) { + if (VerifyMethodHandles) + verify_klass(_masm, klass_reg, VM_CLASS_ID(java_lang_Class), temp_reg, temp2_reg, + "MH argument is a Class"); + __ ld_ptr(Address(klass_reg, java_lang_Class::klass_offset()), klass_reg); +} + +#ifdef ASSERT +static int check_nonzero(const char* xname, int x) { + assert(x != 0, "%s should be nonzero", xname); + return x; +} +#define NONZERO(x) check_nonzero(#x, x) +#else //ASSERT +#define NONZERO(x) (x) +#endif //ASSERT + +#ifdef ASSERT +void MethodHandles::verify_klass(MacroAssembler* _masm, + Register obj_reg, vmClassID klass_id, + Register temp_reg, Register temp2_reg, + const char* error_message) { + InstanceKlass** klass_addr = vmClasses::klass_addr_at(klass_id); + Klass* klass = vmClasses::klass_at(klass_id); + bool did_save = false; + if (temp_reg == noreg || temp2_reg == noreg) { + temp_reg = L1; + temp2_reg = L2; + __ save_frame_and_mov(0, obj_reg, L0); + obj_reg = L0; + did_save = true; + } + Label L_ok, L_bad; + BLOCK_COMMENT("verify_klass {"); + __ verify_oop(obj_reg); + __ br_null_short(obj_reg, Assembler::pn, L_bad); + __ load_klass(obj_reg, temp_reg); + __ set(ExternalAddress((Metadata**)klass_addr), temp2_reg); + __ ld_ptr(Address(temp2_reg, 0), temp2_reg); + __ cmp_and_brx_short(temp_reg, temp2_reg, Assembler::equal, Assembler::pt, L_ok); + intptr_t super_check_offset = klass->super_check_offset(); + __ ld_ptr(Address(temp_reg, super_check_offset), temp_reg); + __ set(ExternalAddress((Metadata**)klass_addr), temp2_reg); + __ ld_ptr(Address(temp2_reg, 0), temp2_reg); + __ cmp_and_brx_short(temp_reg, temp2_reg, Assembler::equal, Assembler::pt, L_ok); + __ BIND(L_bad); + if (did_save) __ restore(); + __ STOP(error_message); + __ BIND(L_ok); + if (did_save) __ restore(); + BLOCK_COMMENT("} verify_klass"); +} + +void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) { + Label L; + BLOCK_COMMENT("verify_ref_kind {"); + __ lduw(Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset())), temp); + __ srl( temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT, temp); + __ and3(temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK, temp); + __ cmp_and_br_short(temp, ref_kind, Assembler::equal, Assembler::pt, L); + { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal); + jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind); + if (ref_kind == JVM_REF_invokeVirtual || + ref_kind == JVM_REF_invokeSpecial) + // could do this for all ref_kinds, but would explode assembly code size + trace_method_handle(_masm, buf); + __ STOP(buf); + } + BLOCK_COMMENT("} verify_ref_kind"); + __ bind(L); +} + +#endif // ASSERT + +void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register target, Register temp, + bool for_compiler_entry) { + Label L_no_such_method; + assert(method == G5_method, "interpreter calling convention"); + assert_different_registers(method, target, temp); + + if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { + Label run_compiled_code; + // JVMTI events, such as single-stepping, are implemented partly by avoiding running + // compiled code in threads for which the event is enabled. Check here for + // interp_only_mode if these events CAN be enabled. + __ verify_thread(); + const Address interp_only(G2_thread, JavaThread::interp_only_mode_offset()); + __ ld(interp_only, temp); + __ cmp_and_br_short(temp, 0, Assembler::zero, Assembler::pt, run_compiled_code); + // Null method test is replicated below in compiled case, + // it might be able to address across the verify_thread() + __ br_null_short(G5_method, Assembler::pn, L_no_such_method); + __ ld_ptr(G5_method, in_bytes(Method::interpreter_entry_offset()), target); + __ jmp(target, 0); + __ delayed()->nop(); + __ BIND(run_compiled_code); + // Note: we could fill some delay slots here, but + // it doesn't matter, since this is interpreter code. + } + + // Compiled case, either static or fall-through from runtime conditional + __ br_null_short(G5_method, Assembler::pn, L_no_such_method); + + const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : + Method::from_interpreted_offset(); + __ ld_ptr(G5_method, in_bytes(entry_offset), target); + __ jmp(target, 0); + __ delayed()->nop(); + + __ bind(L_no_such_method); + AddressLiteral ame(StubRoutines::throw_AbstractMethodError_entry()); + __ jump_to(ame, temp); + __ delayed()->nop(); +} + +void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, + Register recv, Register method_temp, + Register temp2, Register temp3, + bool for_compiler_entry) { + BLOCK_COMMENT("jump_to_lambda_form {"); + // This is the initial entry point of a lazy method handle. + // After type checking, it picks up the invoker from the LambdaForm. + assert_different_registers(recv, method_temp, temp2); // temp3 is only passed on + assert(method_temp == G5_method, "required register for loading method"); + + //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); }); + + // Load the invoker, as MH -> MH.form -> LF.vmentry + __ verify_oop(recv); + __ load_heap_oop(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset()), method_temp, temp2); + __ verify_oop(method_temp); + __ load_heap_oop(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset()), method_temp, temp2); + __ verify_oop(method_temp); + __ load_heap_oop(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset()), method_temp, temp2); + __ verify_oop(method_temp); + __ ld_ptr(Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())), method_temp); + + if (VerifyMethodHandles && !for_compiler_entry) { + // make sure recv is already on stack + __ ld_ptr(method_temp, in_bytes(Method::const_offset()), temp2); + __ load_sized_value(Address(temp2, ConstMethod::size_of_parameters_offset()), + temp2, + sizeof(u2), /*is_signed*/ false); + // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); + Label L; + __ ld_ptr(__ argument_address(temp2, temp2, -1), temp2); + __ cmp_and_br_short(temp2, recv, Assembler::equal, Assembler::pt, L); + __ STOP("receiver not on stack"); + __ BIND(L); + } + + jump_from_method_handle(_masm, method_temp, temp2, temp3, for_compiler_entry); + BLOCK_COMMENT("} jump_to_lambda_form"); +} + + +// Code generation +address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, + vmIntrinsics::ID iid) { + const bool not_for_compiler_entry = false; // this is the interpreter entry + assert(is_signature_polymorphic(iid), "expected invoke iid"); + if (iid == vmIntrinsics::_invokeGeneric || + iid == vmIntrinsics::_compiledLambdaForm) { + // Perhaps surprisingly, the symbolic references visible to Java are not directly used. + // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod. + // They all allow an appendix argument. + __ should_not_reach_here(); // empty stubs make SG sick + return NULL; + } + + // No need in interpreter entry for linkToNative for now. + // Interpreter calls compiled entry through i2c. + if (iid == vmIntrinsics::_linkToNative) { + __ should_not_reach_here(); // Empty stubs make SG sick. + return NULL; + } + + // I5_savedSP/O5_savedSP: sender SP (must preserve; see prepare_to_jump_from_interpreted) + // G5_method: Method* + // G4 (Gargs): incoming argument list (must preserve) + // O0: used as temp to hold mh or receiver + // O1, O4: garbage temps, blown away + Register O1_scratch = O1; + Register O4_param_size = O4; // size of parameters + + // here's where control starts out: + __ align(CodeEntryAlignment); + address entry_point = __ pc(); + + if (VerifyMethodHandles) { + assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); + + Label L; + BLOCK_COMMENT("verify_intrinsic_id {"); + __ lduh(Address(G5_method, Method::intrinsic_id_offset_in_bytes()), O1_scratch); + __ cmp_and_br_short(O1_scratch, (int) iid, Assembler::equal, Assembler::pt, L); + if (iid == vmIntrinsics::_linkToVirtual || + iid == vmIntrinsics::_linkToSpecial) { + // could do this for all kinds, but would explode assembly code size + trace_method_handle(_masm, "bad Method*::intrinsic_id"); + } + __ STOP("bad Method*::intrinsic_id"); + __ bind(L); + BLOCK_COMMENT("} verify_intrinsic_id"); + } + + // First task: Find out how big the argument list is. + Address O4_first_arg_addr; + int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); + assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); + if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { + __ ld_ptr(G5_method, in_bytes(Method::const_offset()), O4_param_size); + __ load_sized_value(Address(O4_param_size, ConstMethod::size_of_parameters_offset()), + O4_param_size, + sizeof(u2), /*is_signed*/ false); + // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); + O4_first_arg_addr = __ argument_address(O4_param_size, O4_param_size, -1); + } else { + DEBUG_ONLY(O4_param_size = noreg); + } + + Register O0_mh = noreg; + if (!is_signature_polymorphic_static(iid)) { + __ ld_ptr(O4_first_arg_addr, O0_mh = O0); + DEBUG_ONLY(O4_param_size = noreg); + } + + // O4_first_arg_addr is live! + + if (log_is_enabled(Info, methodhandles)) { + if (O0_mh != noreg) + __ mov(O0_mh, G3_method_handle); // make stub happy + trace_method_handle_interpreter_entry(_masm, iid); + } + + if (iid == vmIntrinsics::_invokeBasic) { + generate_method_handle_dispatch(_masm, iid, O0_mh, noreg, not_for_compiler_entry); + + } else { + // Adjust argument list by popping the trailing MemberName argument. + Register O0_recv = noreg; + if (MethodHandles::ref_kind_has_receiver(ref_kind)) { + // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack. + __ ld_ptr(O4_first_arg_addr, O0_recv = O0); + DEBUG_ONLY(O4_param_size = noreg); + } + Register G5_member = G5_method; // MemberName ptr; incoming method ptr is dead now + __ ld_ptr(__ argument_address(constant(0)), G5_member); + __ add(Gargs, Interpreter::stackElementSize, Gargs); + generate_method_handle_dispatch(_masm, iid, O0_recv, G5_member, not_for_compiler_entry); + } + + return entry_point; +} + +void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, + vmIntrinsics::ID iid, + Register receiver_reg, + Register member_reg, + bool for_compiler_entry) { + assert(is_signature_polymorphic(iid), "expected invoke iid"); + Register temp1 = (for_compiler_entry ? G1_scratch : O1); + Register temp2 = (for_compiler_entry ? G3_scratch : O2); + Register temp3 = (for_compiler_entry ? G4_scratch : O3); + Register temp4 = (for_compiler_entry ? noreg : O4); + if (for_compiler_entry) { + assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : O0), "only valid assignment"); + assert_different_registers(temp1, O0, O1, O2, O3, O4, O5); + assert_different_registers(temp2, O0, O1, O2, O3, O4, O5); + assert_different_registers(temp3, O0, O1, O2, O3, O4, O5); + assert_different_registers(temp4, O0, O1, O2, O3, O4, O5); + } else { + assert_different_registers(temp1, temp2, temp3, temp4, O5_savedSP); // don't trash lastSP + } + if (receiver_reg != noreg) assert_different_registers(temp1, temp2, temp3, temp4, receiver_reg); + if (member_reg != noreg) assert_different_registers(temp1, temp2, temp3, temp4, member_reg); + + if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) { + if (iid == vmIntrinsics::_linkToNative) { + assert(for_compiler_entry, "only compiler entry is supported"); + } + // indirect through MH.form.vmentry.vmtarget + jump_to_lambda_form(_masm, receiver_reg, G5_method, temp1, temp2, for_compiler_entry); + + } else { + // The method is a member invoker used by direct method handles. + if (VerifyMethodHandles) { + // make sure the trailing argument really is a MemberName (caller responsibility) + verify_klass(_masm, member_reg, VM_CLASS_ID(MemberName_klass), + temp1, temp2, + "MemberName required for invokeVirtual etc."); + } + + Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset())); + Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset())); + Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset())); + Address vmtarget_method( G5_method, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())); + + Register temp1_recv_klass = temp1; + if (iid != vmIntrinsics::_linkToStatic) { + __ verify_oop(receiver_reg); + if (iid == vmIntrinsics::_linkToSpecial) { + // Don't actually load the klass; just null-check the receiver. + __ null_check(receiver_reg); + } else { + // load receiver klass itself + __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes()); + __ load_klass(receiver_reg, temp1_recv_klass); + __ verify_klass_ptr(temp1_recv_klass); + } + BLOCK_COMMENT("check_receiver {"); + // The receiver for the MemberName must be in receiver_reg. + // Check the receiver against the MemberName.clazz + if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) { + // Did not load it above... + __ load_klass(receiver_reg, temp1_recv_klass); + __ verify_klass_ptr(temp1_recv_klass); + } + if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) { + Label L_ok; + Register temp2_defc = temp2; + __ load_heap_oop(member_clazz, temp2_defc, temp3); + load_klass_from_Class(_masm, temp2_defc, temp3, temp4); + __ verify_klass_ptr(temp2_defc); + __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, temp4, L_ok); + // If we get here, the type check failed! + __ STOP("receiver class disagrees with MemberName.clazz"); + __ bind(L_ok); + } + BLOCK_COMMENT("} check_receiver"); + } + if (iid == vmIntrinsics::_linkToSpecial || + iid == vmIntrinsics::_linkToStatic) { + DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass + } + + // Live registers at this point: + // member_reg - MemberName that was the trailing argument + // temp1_recv_klass - klass of stacked receiver, if needed + // O5_savedSP - interpreter linkage (if interpreted) + // O0..O5 - compiler arguments (if compiled) + + Label L_incompatible_class_change_error; + switch (iid) { + case vmIntrinsics::_linkToSpecial: + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp2); + } + __ load_heap_oop(member_vmtarget, G5_method, temp3); + __ ld_ptr(vmtarget_method, G5_method); + break; + + case vmIntrinsics::_linkToStatic: + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp2); + } + __ load_heap_oop(member_vmtarget, G5_method, temp3); + __ ld_ptr(vmtarget_method, G5_method); + break; + + case vmIntrinsics::_linkToVirtual: + { + // same as TemplateTable::invokevirtual, + // minus the CP setup and profiling: + + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp2); + } + + // pick out the vtable index from the MemberName, and then we can discard it: + Register temp2_index = temp2; + __ ld_ptr(member_vmindex, temp2_index); + + if (VerifyMethodHandles) { + Label L_index_ok; + __ cmp_and_br_short(temp2_index, (int) 0, Assembler::greaterEqual, Assembler::pn, L_index_ok); + __ STOP("no virtual index"); + __ BIND(L_index_ok); + } + + // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget + // at this point. And VerifyMethodHandles has already checked clazz, if needed. + + // get target Method* & entry point + __ lookup_virtual_method(temp1_recv_klass, temp2_index, G5_method); + break; + } + + case vmIntrinsics::_linkToInterface: + { + // same as TemplateTable::invokeinterface + // (minus the CP setup and profiling, with different argument motion) + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp2); + } + + Register temp2_intf = temp2; + __ load_heap_oop(member_clazz, temp2_intf, temp3); + load_klass_from_Class(_masm, temp2_intf, temp3, temp4); + __ verify_klass_ptr(temp2_intf); + + Register G5_index = G5_method; + __ ld_ptr(member_vmindex, G5_index); + if (VerifyMethodHandles) { + Label L; + __ cmp_and_br_short(G5_index, 0, Assembler::greaterEqual, Assembler::pt, L); + __ STOP("invalid vtable index for MH.invokeInterface"); + __ bind(L); + } + + // given intf, index, and recv klass, dispatch to the implementation method + __ lookup_interface_method(temp1_recv_klass, temp2_intf, + // note: next two args must be the same: + G5_index, G5_method, + temp3, temp4, + L_incompatible_class_change_error); + break; + } + + default: + fatal("unexpected intrinsic %d: %s", vmIntrinsics::as_int(iid), vmIntrinsics::name_at(iid)); + break; + } + + // Live at this point: + // G5_method + // O5_savedSP (if interpreted) + + // After figuring out which concrete method to call, jump into it. + // Note that this works in the interpreter with no data motion. + // But the compiled version will require that rcx_recv be shifted out. + __ verify_method_ptr(G5_method); + jump_from_method_handle(_masm, G5_method, temp1, temp2, for_compiler_entry); + + if (iid == vmIntrinsics::_linkToInterface) { + __ BIND(L_incompatible_class_change_error); + AddressLiteral icce(StubRoutines::throw_IncompatibleClassChangeError_entry()); + __ jump_to(icce, temp1); + __ delayed()->nop(); + } + } +} + +#ifndef PRODUCT +void trace_method_handle_stub(const char* adaptername, + oopDesc* mh, + intptr_t* saved_sp, + intptr_t* args, + intptr_t* tracing_fp) { + bool has_mh = (strstr(adaptername, "/static") == NULL && + strstr(adaptername, "linkTo") == NULL); // static linkers don't have MH + const char* mh_reg_name = has_mh ? "G3_mh" : "G3"; + tty->print_cr("MH %s %s=" INTPTR_FORMAT " saved_sp=" INTPTR_FORMAT " args=" INTPTR_FORMAT, + adaptername, mh_reg_name, + p2i(mh), p2i(saved_sp), p2i(args)); + + if (Verbose) { + // dumping last frame with frame::describe + + JavaThread* p = JavaThread::active(); + + ResourceMark rm; + // may not be needed by safer and unexpensive here + PreserveExceptionMark pem(Thread::current()); + FrameValues values; + + // Note: We want to allow trace_method_handle from any call site. + // While trace_method_handle creates a frame, it may be entered + // without a valid return PC in O7 (e.g. not just after a call). + // Walking that frame could lead to failures due to that invalid PC. + // => carefully detect that frame when doing the stack walking + + // walk up to the right frame using the "tracing_fp" argument + intptr_t* cur_sp = StubRoutines::Sparc::flush_callers_register_windows_func()(); + frame cur_frame(cur_sp, frame::unpatchable, NULL); + + while (cur_frame.fp() != (intptr_t *)(STACK_BIAS+(uintptr_t)tracing_fp)) { + cur_frame = os::get_sender_for_C_frame(&cur_frame); + } + + // safely create a frame and call frame::describe + intptr_t *dump_sp = cur_frame.sender_sp(); + intptr_t *dump_fp = cur_frame.link(); + + bool walkable = has_mh; // whether the traced frame should be walkable + + // the sender for cur_frame is the caller of trace_method_handle + if (walkable) { + // The previous definition of walkable may have to be refined + // if new call sites cause the next frame constructor to start + // failing. Alternatively, frame constructors could be + // modified to support the current or future non walkable + // frames (but this is more intrusive and is not considered as + // part of this RFE, which will instead use a simpler output). + frame dump_frame = frame(dump_sp, + cur_frame.sp(), // younger_sp + false); // no adaptation + dump_frame.describe(values, 1); + } else { + // Robust dump for frames which cannot be constructed from sp/younger_sp + // Add descriptions without building a Java frame to avoid issues + values.describe(-1, dump_fp, "fp for #1 "); + values.describe(-1, dump_sp, "sp"); + } + + bool has_args = has_mh; // whether Gargs is meaningful + + // mark args, if seems valid (may not be valid for some adapters) + if (has_args) { + if ((args >= dump_sp) && (args < dump_fp)) { + values.describe(-1, args, "*G4_args"); + } + } + + // mark saved_sp, if seems valid (may not be valid for some adapters) + intptr_t *unbiased_sp = (intptr_t *)(STACK_BIAS+(uintptr_t)saved_sp); + const int ARG_LIMIT = 255, SLOP = 45, UNREASONABLE_STACK_MOVE = (ARG_LIMIT + SLOP); + if ((unbiased_sp >= dump_sp - UNREASONABLE_STACK_MOVE) && (unbiased_sp < dump_fp)) { + values.describe(-1, unbiased_sp, "*saved_sp+STACK_BIAS"); + } + + // Note: the unextended_sp may not be correct + tty->print_cr(" stack layout:"); + values.print(p); + if (has_mh && oopDesc::is_oop(mh)) { + mh->print(); + if (java_lang_invoke_MethodHandle::is_instance(mh)) { + java_lang_invoke_MethodHandle::form(mh)->print(); + } + } + } +} + +void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { + if (!log_is_enabled(Info, methodhandles)) return; + BLOCK_COMMENT("trace_method_handle {"); + // save: Gargs, O5_savedSP + __ save_frame(16); // need space for saving required FPU state + + __ set((intptr_t) adaptername, O0); + __ mov(G3_method_handle, O1); + __ mov(I5_savedSP, O2); + __ mov(Gargs, O3); + __ mov(I6, O4); // frame identifier for safe stack walking + + // Save scratched registers that might be needed. Robustness is more + // important than optimizing the saves for this debug only code. + + // save FP result, valid at some call sites (adapter_opt_return_float, ...) + Address d_save(FP, -sizeof(jdouble) + STACK_BIAS); + __ stf(FloatRegisterImpl::D, Ftos_d, d_save); + // Safely save all globals but G2 (handled by call_VM_leaf) and G7 + // (OS reserved). + __ mov(G3_method_handle, L3); + __ mov(Gargs, L4); + __ mov(G5_method_type, L5); + __ mov(G6, L6); + __ mov(G1, L1); + + __ call_VM_leaf(L2 /* for G2 */, CAST_FROM_FN_PTR(address, trace_method_handle_stub)); + + __ mov(L3, G3_method_handle); + __ mov(L4, Gargs); + __ mov(L5, G5_method_type); + __ mov(L6, G6); + __ mov(L1, G1); + __ ldf(FloatRegisterImpl::D, d_save, Ftos_d); + + __ restore(); + BLOCK_COMMENT("} trace_method_handle"); +} +#endif // PRODUCT diff -ur --new-file a/src/hotspot/cpu/sparc/methodHandles_sparc.hpp b/src/hotspot/cpu/sparc/methodHandles_sparc.hpp --- a/src/hotspot/cpu/sparc/methodHandles_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/methodHandles_sparc.hpp 2023-04-16 11:42:11.068664022 +0000 @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// Platform-specific definitions for method handles. +// These definitions are inlined into class MethodHandles. + +// Adapters +enum /* platform_dependent_constants */ { + adapter_code_size = 35000 DEBUG_ONLY(+ 50000) +}; + +// Additional helper methods for MethodHandles code generation: +public: + static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg, Register temp_reg, Register temp2_reg); + + static void verify_klass(MacroAssembler* _masm, + Register obj_reg, vmClassID klass_id, + Register temp_reg, Register temp2_reg, + const char* error_message = "wrong klass") NOT_DEBUG_RETURN; + + static void verify_method_handle(MacroAssembler* _masm, Register mh_reg, + Register temp_reg, Register temp2_reg) { + verify_klass(_masm, mh_reg, VM_CLASS_ID(java_lang_invoke_MethodHandle), + temp_reg, temp2_reg, + "reference is a MH"); + } + + static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; + + // Similar to InterpreterMacroAssembler::jump_from_interpreted. + // Takes care of special dispatch from single stepping too. + static void jump_from_method_handle(MacroAssembler* _masm, Register method, + Register temp, Register temp2, + bool for_compiler_entry); + + static void jump_to_lambda_form(MacroAssembler* _masm, + Register recv, Register method_temp, + Register temp2, Register temp3, + bool for_compiler_entry); diff -ur --new-file a/src/hotspot/cpu/sparc/nativeInst_sparc.cpp b/src/hotspot/cpu/sparc/nativeInst_sparc.cpp --- a/src/hotspot/cpu/sparc/nativeInst_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/nativeInst_sparc.cpp 2023-04-16 11:42:11.069031480 +0000 @@ -0,0 +1,912 @@ +/* + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/codeCache.hpp" +#include "code/compiledIC.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_sparc.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/ostream.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#endif + +void NativeInstruction::set_data64_sethi(address instaddr, intptr_t x) { + ResourceMark rm; + CodeBuffer buf(instaddr, 10 * BytesPerInstWord ); + MacroAssembler* _masm = new MacroAssembler(&buf); + Register destreg; + + destreg = inv_rd(*(unsigned int *)instaddr); + // Generate a the new sequence + _masm->patchable_sethi(x, destreg); + ICache::invalidate_range(instaddr, 7 * BytesPerInstWord); +} + +void NativeInstruction::verify_data64_sethi(address instaddr, intptr_t x) { + ResourceMark rm; + unsigned char buffer[10 * BytesPerInstWord]; + CodeBuffer buf(buffer, 10 * BytesPerInstWord); + MacroAssembler masm(&buf); + + Register destreg = inv_rd(*(unsigned int *)instaddr); + // Generate the proper sequence into a temporary buffer and compare + // it with the original sequence. + masm.patchable_sethi(x, destreg); + int len = buffer - masm.pc(); + for (int i = 0; i < len; i++) { + guarantee(instaddr[i] == buffer[i], "instructions must match"); + } +} + +void NativeInstruction::verify() { + // make sure code pattern is actually an instruction address + address addr = addr_at(0); + if (addr == 0 || ((intptr_t)addr & 3) != 0) { + fatal("not an instruction address"); + } +} + +void NativeInstruction::print() { + tty->print_cr(INTPTR_FORMAT ": 0x%x", p2i(addr_at(0)), long_at(0)); +} + +void NativeInstruction::set_long_at(int offset, int i) { + address addr = addr_at(offset); + *(int*)addr = i; + ICache::invalidate_word(addr); +} + +void NativeInstruction::set_jlong_at(int offset, jlong i) { + address addr = addr_at(offset); + *(jlong*)addr = i; + // Don't need to invalidate 2 words here, because + // the flush instruction operates on doublewords. + ICache::invalidate_word(addr); +} + +void NativeInstruction::set_addr_at(int offset, address x) { + address addr = addr_at(offset); + assert( ((intptr_t)addr & (wordSize-1)) == 0, "set_addr_at bad address alignment"); + *(uintptr_t*)addr = (uintptr_t)x; + // Don't need to invalidate 2 words here in the 64-bit case, + // because the flush instruction operates on doublewords. + ICache::invalidate_word(addr); + // The Intel code has this assertion for NativeCall::set_destination, + // NativeMovConstReg::set_data, NativeMovRegMem::set_offset, + // NativeJump::set_jump_destination, and NativePushImm32::set_data + //assert (Patching_lock->owned_by_self(), "must hold lock to patch instruction") +} + +bool NativeInstruction::is_zero_test(Register ®) { + int x = long_at(0); + Assembler::op3s temp = (Assembler::op3s) (Assembler::sub_op3 | Assembler::cc_bit_op3); + if (is_op3(x, temp, Assembler::arith_op) && + inv_immed(x) && inv_rd(x) == G0) { + if (inv_rs1(x) == G0) { + reg = inv_rs2(x); + return true; + } else if (inv_rs2(x) == G0) { + reg = inv_rs1(x); + return true; + } + } + return false; +} + +bool NativeInstruction::is_load_store_with_small_offset(Register reg) { + int x = long_at(0); + if (is_op(x, Assembler::ldst_op) && + inv_rs1(x) == reg && inv_immed(x)) { + return true; + } + return false; +} + +void NativeCall::verify() { + NativeInstruction::verify(); + // make sure code pattern is actually a call instruction + int x = long_at(0); + if (!is_op(x, Assembler::call_op)) { + fatal("not a call: 0x%x @ " INTPTR_FORMAT, x, p2i(instruction_address())); + } +} + +void NativeCall::print() { + tty->print_cr(INTPTR_FORMAT ": call " INTPTR_FORMAT, p2i(instruction_address()), p2i(destination())); +} + + +// MT-safe patching of a call instruction (and following word). +// First patches the second word, and then atomicly replaces +// the first word with the first new instruction word. +// Other processors might briefly see the old first word +// followed by the new second word. This is OK if the old +// second word is harmless, and the new second word may be +// harmlessly executed in the delay slot of the call. +void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) { + assert(Patching_lock->is_locked() || + SafepointSynchronize::is_at_safepoint(), "concurrent code patching"); + assert (instr_addr != NULL, "illegal address for code patching"); + NativeCall* n_call = nativeCall_at (instr_addr); // checking that it is a call + assert(NativeCall::instruction_size == 8, "wrong instruction size; must be 8"); + int i0 = ((int*)code_buffer)[0]; + int i1 = ((int*)code_buffer)[1]; + int* contention_addr = (int*) n_call->addr_at(1*BytesPerInstWord); + assert(inv_op(*contention_addr) == Assembler::arith_op || + *contention_addr == nop_instruction(), + "must not interfere with original call"); + // The set_long_at calls do the ICacheInvalidate so we just need to do them in reverse order + n_call->set_long_at(1*BytesPerInstWord, i1); + n_call->set_long_at(0*BytesPerInstWord, i0); + // NOTE: It is possible that another thread T will execute + // only the second patched word. + // In other words, since the original instruction is this + // call patching_stub; nop (NativeCall) + // and the new sequence from the buffer is this: + // sethi %hi(K), %r; add %r, %lo(K), %r (NativeMovConstReg) + // what T will execute is this: + // call patching_stub; add %r, %lo(K), %r + // thereby putting garbage into %r before calling the patching stub. + // This is OK, because the patching stub ignores the value of %r. + + // Make sure the first-patched instruction, which may co-exist + // briefly with the call, will do something harmless. + assert(inv_op(*contention_addr) == Assembler::arith_op || + *contention_addr == nop_instruction(), + "must not interfere with original call"); +} + +// Similar to replace_mt_safe, but just changes the destination. The +// important thing is that free-running threads are able to execute this +// call instruction at all times. Thus, the displacement field must be +// instruction-word-aligned. This is always true on SPARC. +// +// Used in the runtime linkage of calls; see class CompiledIC. +void NativeCall::set_destination_mt_safe(address dest) { + assert((Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()) || + CompiledICLocker::is_safe(addr_at(0)), + "concurrent code patching"); + // set_destination uses set_long_at which does the ICache::invalidate + set_destination(dest); +} + +// Code for unit testing implementation of NativeCall class +void NativeCall::test() { +#ifdef ASSERT + ResourceMark rm; + CodeBuffer cb("test", 100, 100); + MacroAssembler* a = new MacroAssembler(&cb); + NativeCall *nc; + uint idx; + int offsets[] = { + 0x0, + (int)0xfffffff0, + (int)0x7ffffff0, + (int)0x80000000, + 0x20, + 0x4000, + }; + + VM_Version::allow_all(); + + a->call( a->pc(), relocInfo::none ); + a->delayed()->nop(); + nc = nativeCall_at( cb.insts_begin() ); + nc->print(); + + nc = nativeCall_overwriting_at( nc->next_instruction_address() ); + for (idx = 0; idx < ARRAY_SIZE(offsets); idx++) { + nc->set_destination( cb.insts_begin() + offsets[idx] ); + assert(nc->destination() == (cb.insts_begin() + offsets[idx]), "check unit test"); + nc->print(); + } + + nc = nativeCall_before( cb.insts_begin() + 8 ); + nc->print(); + + VM_Version::revert(); +#endif +} +// End code for unit testing implementation of NativeCall class + +//------------------------------------------------------------------- + +void NativeFarCall::set_destination(address dest) { + // Address materialized in the instruction stream, so nothing to do. + return; +#if 0 // What we'd do if we really did want to change the destination + if (destination() == dest) { + return; + } + ResourceMark rm; + CodeBuffer buf(addr_at(0), instruction_size + 1); + MacroAssembler* _masm = new MacroAssembler(&buf); + // Generate the new sequence + AddressLiteral(dest); + _masm->jumpl_to(dest, O7, O7); + ICache::invalidate_range(addr_at(0), instruction_size ); +#endif +} + +void NativeFarCall::verify() { + // make sure code pattern is actually a jumpl_to instruction + assert((int)instruction_size == (int)NativeJump::instruction_size, "same as jump_to"); + assert((int)jmpl_offset == (int)NativeMovConstReg::add_offset, "sethi size ok"); + nativeJump_at(addr_at(0))->verify(); +} + +bool NativeFarCall::is_call_at(address instr) { + return nativeInstruction_at(instr)->is_sethi(); +} + +void NativeFarCall::print() { + tty->print_cr(INTPTR_FORMAT ": call " INTPTR_FORMAT, p2i(instruction_address()), p2i(destination())); +} + +bool NativeFarCall::destination_is_compiled_verified_entry_point() { + nmethod* callee = CodeCache::find_nmethod(destination()); + if (callee == NULL) { + return false; + } else { + return destination() == callee->verified_entry_point(); + } +} + +// MT-safe patching of a far call. +void NativeFarCall::replace_mt_safe(address instr_addr, address code_buffer) { + Unimplemented(); +} + +// Code for unit testing implementation of NativeFarCall class +void NativeFarCall::test() { + Unimplemented(); +} +// End code for unit testing implementation of NativeFarCall class + +//------------------------------------------------------------------- + + +void NativeMovConstReg::verify() { + NativeInstruction::verify(); + // make sure code pattern is actually a "set_metadata" synthetic instruction + // see MacroAssembler::set_oop() + int i0 = long_at(sethi_offset); + int i1 = long_at(add_offset); + + // verify the pattern "sethi %hi22(imm), reg ; add reg, %lo10(imm), reg" + Register rd = inv_rd(i0); + if (!is_op2(i0, Assembler::sethi_op2) && rd != G0 ) { + fatal("not a set_metadata"); + } +} + + +void NativeMovConstReg::print() { + tty->print_cr(INTPTR_FORMAT ": mov reg, " INTPTR_FORMAT, p2i(instruction_address()), data()); +} + + +intptr_t NativeMovConstReg::data() const { + return data64(addr_at(sethi_offset), long_at(add_offset)); +} + + +void NativeMovConstReg::set_data(intptr_t x) { + set_data64_sethi(addr_at(sethi_offset), x); + set_long_at(add_offset, set_data32_simm13( long_at(add_offset), x)); + + // also store the value into an oop_Relocation cell, if any + CodeBlob* cb = CodeCache::find_blob(instruction_address()); + nmethod* nm = cb ? cb->as_nmethod_or_null() : NULL; + if (nm != NULL) { + RelocIterator iter(nm, instruction_address(), next_instruction_address()); + oop* oop_addr = NULL; + Metadata** metadata_addr = NULL; + while (iter.next()) { + if (iter.type() == relocInfo::oop_type) { + oop_Relocation *r = iter.oop_reloc(); + if (oop_addr == NULL) { + oop_addr = r->oop_addr(); + *oop_addr = cast_to_oop(x); + } else { + assert(oop_addr == r->oop_addr(), "must be only one set-oop here"); + } + } + if (iter.type() == relocInfo::metadata_type) { + metadata_Relocation *r = iter.metadata_reloc(); + if (metadata_addr == NULL) { + metadata_addr = r->metadata_addr(); + *metadata_addr = (Metadata*)x; + } else { + assert(metadata_addr == r->metadata_addr(), "must be only one set-metadata here"); + } + } + } + } +} + + +// Code for unit testing implementation of NativeMovConstReg class +void NativeMovConstReg::test() { +#ifdef ASSERT + ResourceMark rm; + CodeBuffer cb("test", 100, 100); + MacroAssembler* a = new MacroAssembler(&cb); + NativeMovConstReg* nm; + uint idx; + int offsets[] = { + 0x0, + (int)0x7fffffff, + (int)0x80000000, + (int)0xffffffff, + 0x20, + 4096, + 4097, + }; + + VM_Version::allow_all(); + + AddressLiteral al1(0xaaaabbbb, relocInfo::external_word_type); + a->sethi(al1, I3); + a->add(I3, al1.low10(), I3); + AddressLiteral al2(0xccccdddd, relocInfo::external_word_type); + a->sethi(al2, O2); + a->add(O2, al2.low10(), O2); + + nm = nativeMovConstReg_at( cb.insts_begin() ); + nm->print(); + + nm = nativeMovConstReg_at( nm->next_instruction_address() ); + for (idx = 0; idx < ARRAY_SIZE(offsets); idx++) { + nm->set_data( offsets[idx] ); + assert(nm->data() == offsets[idx], "check unit test"); + } + nm->print(); + + VM_Version::revert(); +#endif +} +// End code for unit testing implementation of NativeMovConstReg class + +//------------------------------------------------------------------- + +void NativeMovConstReg32::verify() { + NativeInstruction::verify(); + // make sure code pattern is actually a "set_metadata" synthetic instruction + // see MacroAssembler::set_oop() + int i0 = long_at(sethi_offset); + int i1 = long_at(add_offset); + + // verify the pattern "sethi %hi22(imm), reg ; add reg, %lo10(imm), reg" + Register rd = inv_rd(i0); + if (!is_op2(i0, Assembler::sethi_op2) && rd != G0 ) { + fatal("not a set_metadata"); + } +} + + +void NativeMovConstReg32::print() { + tty->print_cr(INTPTR_FORMAT ": mov reg, " INTPTR_FORMAT, p2i(instruction_address()), data()); +} + + +intptr_t NativeMovConstReg32::data() const { + return data32(long_at(sethi_offset), long_at(add_offset)); +} + + +void NativeMovConstReg32::set_data(intptr_t x) { + set_long_at(sethi_offset, set_data32_sethi( long_at(sethi_offset), x)); + set_long_at(add_offset, set_data32_simm13( long_at(add_offset), x)); + + // also store the value into an oop_Relocation cell, if any + CodeBlob* cb = CodeCache::find_blob(instruction_address()); + nmethod* nm = cb ? cb->as_nmethod_or_null() : NULL; + if (nm != NULL) { + RelocIterator iter(nm, instruction_address(), next_instruction_address()); + oop* oop_addr = NULL; + Metadata** metadata_addr = NULL; + while (iter.next()) { + if (iter.type() == relocInfo::oop_type) { + oop_Relocation *r = iter.oop_reloc(); + if (oop_addr == NULL) { + oop_addr = r->oop_addr(); + *oop_addr = cast_to_oop(x); + } else { + assert(oop_addr == r->oop_addr(), "must be only one set-oop here"); + } + } + if (iter.type() == relocInfo::metadata_type) { + metadata_Relocation *r = iter.metadata_reloc(); + if (metadata_addr == NULL) { + metadata_addr = r->metadata_addr(); + *metadata_addr = (Metadata*)x; + } else { + assert(metadata_addr == r->metadata_addr(), "must be only one set-metadata here"); + } + } + } + } +} + +//------------------------------------------------------------------- + +void NativeMovConstRegPatching::verify() { + NativeInstruction::verify(); + // Make sure code pattern is sethi/nop/add. + int i0 = long_at(sethi_offset); + int i1 = long_at(nop_offset); + int i2 = long_at(add_offset); + assert((int)nop_offset == (int)NativeMovConstReg::add_offset, "sethi size ok"); + + // Verify the pattern "sethi %hi22(imm), reg; nop; add reg, %lo10(imm), reg" + // The casual reader should note that on Sparc a nop is a special case if sethi + // in which the destination register is %g0. + Register rd0 = inv_rd(i0); + Register rd1 = inv_rd(i1); + if (!(is_op2(i0, Assembler::sethi_op2) && rd0 != G0 && + is_op2(i1, Assembler::sethi_op2) && rd1 == G0 && // nop is a special case of sethi + is_op3(i2, Assembler::add_op3, Assembler::arith_op) && + inv_immed(i2) && (unsigned)get_simm13(i2) < (1 << 10) && + rd0 == inv_rs1(i2) && rd0 == inv_rd(i2))) { + fatal("not a set_metadata"); + } +} + + +void NativeMovConstRegPatching::print() { + tty->print_cr(INTPTR_FORMAT ": mov reg, 0x%x", p2i(instruction_address()), data()); +} + + +int NativeMovConstRegPatching::data() const { + return data64(addr_at(sethi_offset), long_at(add_offset)); +} + + +void NativeMovConstRegPatching::set_data(int x) { + set_data64_sethi(addr_at(sethi_offset), x); + set_long_at(add_offset, set_data32_simm13(long_at(add_offset), x)); + + // also store the value into an oop_Relocation cell, if any + CodeBlob* cb = CodeCache::find_blob(instruction_address()); + nmethod* nm = cb ? cb->as_nmethod_or_null() : NULL; + if (nm != NULL) { + RelocIterator iter(nm, instruction_address(), next_instruction_address()); + oop* oop_addr = NULL; + Metadata** metadata_addr = NULL; + while (iter.next()) { + if (iter.type() == relocInfo::oop_type) { + oop_Relocation *r = iter.oop_reloc(); + if (oop_addr == NULL) { + oop_addr = r->oop_addr(); + *oop_addr = cast_to_oop(x); + } else { + assert(oop_addr == r->oop_addr(), "must be only one set-oop here"); + } + } + if (iter.type() == relocInfo::metadata_type) { + metadata_Relocation *r = iter.metadata_reloc(); + if (metadata_addr == NULL) { + metadata_addr = r->metadata_addr(); + *metadata_addr = (Metadata*)x; + } else { + assert(metadata_addr == r->metadata_addr(), "must be only one set-metadata here"); + } + } + } + } +} + + +// Code for unit testing implementation of NativeMovConstRegPatching class +void NativeMovConstRegPatching::test() { +#ifdef ASSERT + ResourceMark rm; + CodeBuffer cb("test", 100, 100); + MacroAssembler* a = new MacroAssembler(&cb); + NativeMovConstRegPatching* nm; + uint idx; + int offsets[] = { + 0x0, + (int)0x7fffffff, + (int)0x80000000, + (int)0xffffffff, + 0x20, + 4096, + 4097, + }; + + VM_Version::allow_all(); + + AddressLiteral al1(0xaaaabbbb, relocInfo::external_word_type); + a->sethi(al1, I3); + a->nop(); + a->add(I3, al1.low10(), I3); + AddressLiteral al2(0xccccdddd, relocInfo::external_word_type); + a->sethi(al2, O2); + a->nop(); + a->add(O2, al2.low10(), O2); + + nm = nativeMovConstRegPatching_at( cb.insts_begin() ); + nm->print(); + + nm = nativeMovConstRegPatching_at( nm->next_instruction_address() ); + for (idx = 0; idx < ARRAY_SIZE(offsets); idx++) { + nm->set_data( offsets[idx] ); + assert(nm->data() == offsets[idx], "check unit test"); + } + nm->print(); + + VM_Version::revert(); +#endif // ASSERT +} +// End code for unit testing implementation of NativeMovConstRegPatching class + + +//------------------------------------------------------------------- + + +void NativeMovRegMem::verify() { + NativeInstruction::verify(); + // make sure code pattern is actually a "ld" or "st" of some sort. + int i0 = long_at(0); + int op3 = inv_op3(i0); + + assert((int)add_offset == NativeMovConstReg::add_offset, "sethi size ok"); + + if (!(is_op(i0, Assembler::ldst_op) && + inv_immed(i0) && + 0 != (op3 < op3_ldst_int_limit + ? (1 << op3 ) & (op3_mask_ld | op3_mask_st) + : (1 << (op3 - op3_ldst_int_limit)) & (op3_mask_ldf | op3_mask_stf)))) + { + int i1 = long_at(ldst_offset); + Register rd = inv_rd(i0); + + op3 = inv_op3(i1); + if (!is_op(i1, Assembler::ldst_op) && rd == inv_rs2(i1) && + 0 != (op3 < op3_ldst_int_limit + ? (1 << op3 ) & (op3_mask_ld | op3_mask_st) + : (1 << (op3 - op3_ldst_int_limit)) & (op3_mask_ldf | op3_mask_stf))) { + fatal("not a ld* or st* op"); + } + } +} + + +void NativeMovRegMem::print() { + if (is_immediate()) { + // offset is a signed 13-bit immediate, so casting it to int will not lose significant bits + tty->print_cr(INTPTR_FORMAT ": mov reg, [reg + %d]", p2i(instruction_address()), (int)offset()); + } else { + tty->print_cr(INTPTR_FORMAT ": mov reg, [reg + reg]", p2i(instruction_address())); + } +} + + +// Code for unit testing implementation of NativeMovRegMem class +void NativeMovRegMem::test() { +#ifdef ASSERT + ResourceMark rm; + CodeBuffer cb("test", 1000, 1000); + MacroAssembler* a = new MacroAssembler(&cb); + NativeMovRegMem* nm; + uint idx = 0; + uint idx1; + int offsets[] = { + 0x0, + (int)0xffffffff, + (int)0x7fffffff, + (int)0x80000000, + 4096, + 4097, + 0x20, + 0x4000, + }; + + VM_Version::allow_all(); + + AddressLiteral al1(0xffffffff, relocInfo::external_word_type); + AddressLiteral al2(0xaaaabbbb, relocInfo::external_word_type); + a->ldsw( G5, al1.low10(), G4 ); idx++; + a->sethi(al2, I3); a->add(I3, al2.low10(), I3); + a->ldsw( G5, I3, G4 ); idx++; + a->ldsb( G5, al1.low10(), G4 ); idx++; + a->sethi(al2, I3); a->add(I3, al2.low10(), I3); + a->ldsb( G5, I3, G4 ); idx++; + a->ldsh( G5, al1.low10(), G4 ); idx++; + a->sethi(al2, I3); a->add(I3, al2.low10(), I3); + a->ldsh( G5, I3, G4 ); idx++; + a->lduw( G5, al1.low10(), G4 ); idx++; + a->sethi(al2, I3); a->add(I3, al2.low10(), I3); + a->lduw( G5, I3, G4 ); idx++; + a->ldub( G5, al1.low10(), G4 ); idx++; + a->sethi(al2, I3); a->add(I3, al2.low10(), I3); + a->ldub( G5, I3, G4 ); idx++; + a->lduh( G5, al1.low10(), G4 ); idx++; + a->sethi(al2, I3); a->add(I3, al2.low10(), I3); + a->lduh( G5, I3, G4 ); idx++; + a->ldx( G5, al1.low10(), G4 ); idx++; + a->sethi(al2, I3); a->add(I3, al2.low10(), I3); + a->ldx( G5, I3, G4 ); idx++; + a->ldd( G5, al1.low10(), G4 ); idx++; + a->sethi(al2, I3); a->add(I3, al2.low10(), I3); + a->ldd( G5, I3, G4 ); idx++; + a->ldf( FloatRegisterImpl::D, O2, -1, F14 ); idx++; + a->sethi(al2, I3); a->add(I3, al2.low10(), I3); + a->ldf( FloatRegisterImpl::S, O0, I3, F15 ); idx++; + + a->stw( G5, G4, al1.low10() ); idx++; + a->sethi(al2, I3); a->add(I3, al2.low10(), I3); + a->stw( G5, G4, I3 ); idx++; + a->stb( G5, G4, al1.low10() ); idx++; + a->sethi(al2, I3); a->add(I3, al2.low10(), I3); + a->stb( G5, G4, I3 ); idx++; + a->sth( G5, G4, al1.low10() ); idx++; + a->sethi(al2, I3); a->add(I3, al2.low10(), I3); + a->sth( G5, G4, I3 ); idx++; + a->stx( G5, G4, al1.low10() ); idx++; + a->sethi(al2, I3); a->add(I3, al2.low10(), I3); + a->stx( G5, G4, I3 ); idx++; + a->std( G5, G4, al1.low10() ); idx++; + a->sethi(al2, I3); a->add(I3, al2.low10(), I3); + a->std( G5, G4, I3 ); idx++; + a->stf( FloatRegisterImpl::S, F18, O2, -1 ); idx++; + a->sethi(al2, I3); a->add(I3, al2.low10(), I3); + a->stf( FloatRegisterImpl::S, F15, O0, I3 ); idx++; + + nm = nativeMovRegMem_at( cb.insts_begin() ); + nm->print(); + nm->set_offset( low10(0) ); + nm->print(); + nm->add_offset_in_bytes( low10(0xbb) * wordSize ); + nm->print(); + + while (--idx) { + nm = nativeMovRegMem_at( nm->next_instruction_address() ); + nm->print(); + for (idx1 = 0; idx1 < ARRAY_SIZE(offsets); idx1++) { + nm->set_offset( nm->is_immediate() ? low10(offsets[idx1]) : offsets[idx1] ); + assert(nm->offset() == (nm->is_immediate() ? low10(offsets[idx1]) : offsets[idx1]), + "check unit test"); + nm->print(); + } + nm->add_offset_in_bytes( low10(0xbb) * wordSize ); + nm->print(); + } + + VM_Version::revert(); +#endif // ASSERT +} + +// End code for unit testing implementation of NativeMovRegMem class + + +//-------------------------------------------------------------------------------- + + +void NativeJump::verify() { + NativeInstruction::verify(); + int i0 = long_at(sethi_offset); + int i1 = long_at(jmpl_offset); + assert((int)jmpl_offset == (int)NativeMovConstReg::add_offset, "sethi size ok"); + // verify the pattern "sethi %hi22(imm), treg ; jmpl treg, %lo10(imm), lreg" + Register rd = inv_rd(i0); + // In LP64, the jump instruction location varies for non relocatable + // jumps, for example is could be sethi, xor, jmp instead of the + // 7 instructions for sethi. So let's check sethi only. + if (!is_op2(i0, Assembler::sethi_op2) && rd != G0 ) { + fatal("not a jump_to instruction"); + } +} + + +void NativeJump::print() { + tty->print_cr(INTPTR_FORMAT ": jmpl reg, " INTPTR_FORMAT, p2i(instruction_address()), p2i(jump_destination())); +} + + +// Code for unit testing implementation of NativeJump class +void NativeJump::test() { +#ifdef ASSERT + ResourceMark rm; + CodeBuffer cb("test", 100, 100); + MacroAssembler* a = new MacroAssembler(&cb); + NativeJump* nj; + uint idx; + int offsets[] = { + 0x0, + (int)0xffffffff, + (int)0x7fffffff, + (int)0x80000000, + 4096, + 4097, + 0x20, + 0x4000, + }; + + VM_Version::allow_all(); + + AddressLiteral al(0x7fffbbbb, relocInfo::external_word_type); + a->sethi(al, I3); + a->jmpl(I3, al.low10(), G0, RelocationHolder::none); + a->delayed()->nop(); + a->sethi(al, I3); + a->jmpl(I3, al.low10(), L3, RelocationHolder::none); + a->delayed()->nop(); + + nj = nativeJump_at( cb.insts_begin() ); + nj->print(); + + nj = nativeJump_at( nj->next_instruction_address() ); + for (idx = 0; idx < ARRAY_SIZE(offsets); idx++) { + nj->set_jump_destination( nj->instruction_address() + offsets[idx] ); + assert(nj->jump_destination() == (nj->instruction_address() + offsets[idx]), "check unit test"); + nj->print(); + } + + VM_Version::revert(); +#endif // ASSERT +} +// End code for unit testing implementation of NativeJump class + + +void NativeJump::insert(address code_pos, address entry) { + Unimplemented(); +} + +// MT safe inserting of a jump over an unknown instruction sequence (used by nmethod::makeZombie) +// The problem: jump_to is a 3-word instruction (including its delay slot). +// Atomic write can be only with 1 word. +void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { + // Here's one way to do it: Pre-allocate a three-word jump sequence somewhere + // in the header of the nmethod, within a short branch's span of the patch point. + // Set up the jump sequence using NativeJump::insert, and then use an annulled + // unconditional branch at the target site (an atomic 1-word update). + // Limitations: You can only patch nmethods, with any given nmethod patched at + // most once, and the patch must be in the nmethod's header. + // It's messy, but you can ask the CodeCache for the nmethod containing the + // target address. + + // %%%%% For now, do something MT-stupid: + ResourceMark rm; + int code_size = 1 * BytesPerInstWord; + CodeBuffer cb(verified_entry, code_size + 1); + MacroAssembler* a = new MacroAssembler(&cb); + a->ldsw(G0, 0, O7); // "ld" must agree with code in the signal handler + ICache::invalidate_range(verified_entry, code_size); +} + + +void NativeIllegalInstruction::insert(address code_pos) { + NativeIllegalInstruction* nii = (NativeIllegalInstruction*) nativeInstruction_at(code_pos); + nii->set_long_at(0, illegal_instruction()); +} + +static int illegal_instruction_bits = 0; + +int NativeInstruction::illegal_instruction() { + if (illegal_instruction_bits == 0) { + ResourceMark rm; + char buf[40]; + CodeBuffer cbuf((address)&buf[0], 20); + MacroAssembler* a = new MacroAssembler(&cbuf); + address ia = a->pc(); + a->trap(ST_RESERVED_FOR_USER_0 + 1); + int bits = *(int*)ia; + assert(is_op3(bits, Assembler::trap_op3, Assembler::arith_op), "bad instruction"); + illegal_instruction_bits = bits; + assert(illegal_instruction_bits != 0, "oops"); + } + return illegal_instruction_bits; +} + +static int ic_miss_trap_bits = 0; + +bool NativeInstruction::is_ic_miss_trap() { + if (ic_miss_trap_bits == 0) { + ResourceMark rm; + char buf[40]; + CodeBuffer cbuf((address)&buf[0], 20); + MacroAssembler* a = new MacroAssembler(&cbuf); + address ia = a->pc(); + a->trap(Assembler::notEqual, Assembler::ptr_cc, G0, ST_RESERVED_FOR_USER_0 + 2); + int bits = *(int*)ia; + assert(is_op3(bits, Assembler::trap_op3, Assembler::arith_op), "bad instruction"); + ic_miss_trap_bits = bits; + assert(ic_miss_trap_bits != 0, "oops"); + } + return long_at(0) == ic_miss_trap_bits; +} + + +bool NativeInstruction::is_illegal() { + if (illegal_instruction_bits == 0) { + return false; + } + return long_at(0) == illegal_instruction_bits; +} + + +void NativeGeneralJump::verify() { + assert(((NativeInstruction *)this)->is_jump() || + ((NativeInstruction *)this)->is_cond_jump(), "not a general jump instruction"); +} + + +void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { + Assembler::Condition condition = Assembler::always; + int x = Assembler::op2(Assembler::br_op2) | Assembler::annul(false) | + Assembler::cond(condition) | Assembler::wdisp((intptr_t)entry, (intptr_t)code_pos, 22); + NativeGeneralJump* ni = (NativeGeneralJump*) nativeInstruction_at(code_pos); + ni->set_long_at(0, x); +} + + +// MT-safe patching of a jmp instruction (and following word). +// First patches the second word, and then atomicly replaces +// the first word with the first new instruction word. +// Other processors might briefly see the old first word +// followed by the new second word. This is OK if the old +// second word is harmless, and the new second word may be +// harmlessly executed in the delay slot of the call. +void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { + assert(Patching_lock->is_locked() || + SafepointSynchronize::is_at_safepoint(), "concurrent code patching"); + assert (instr_addr != NULL, "illegal address for code patching"); + NativeGeneralJump* h_jump = nativeGeneralJump_at (instr_addr); // checking that it is a call + assert(NativeGeneralJump::instruction_size == 8, "wrong instruction size; must be 8"); + int i0 = ((int*)code_buffer)[0]; + int i1 = ((int*)code_buffer)[1]; + int* contention_addr = (int*) h_jump->addr_at(1*BytesPerInstWord); + assert(inv_op(*contention_addr) == Assembler::arith_op || + *contention_addr == nop_instruction(), + "must not interfere with original call"); + // The set_long_at calls do the ICacheInvalidate so we just need to do them in reverse order + h_jump->set_long_at(1*BytesPerInstWord, i1); + h_jump->set_long_at(0*BytesPerInstWord, i0); + // NOTE: It is possible that another thread T will execute + // only the second patched word. + // In other words, since the original instruction is this + // jmp patching_stub; nop (NativeGeneralJump) + // and the new sequence from the buffer is this: + // sethi %hi(K), %r; add %r, %lo(K), %r (NativeMovConstReg) + // what T will execute is this: + // jmp patching_stub; add %r, %lo(K), %r + // thereby putting garbage into %r before calling the patching stub. + // This is OK, because the patching stub ignores the value of %r. + + // Make sure the first-patched instruction, which may co-exist + // briefly with the call, will do something harmless. + assert(inv_op(*contention_addr) == Assembler::arith_op || + *contention_addr == nop_instruction(), + "must not interfere with original call"); +} diff -ur --new-file a/src/hotspot/cpu/sparc/nativeInst_sparc.hpp b/src/hotspot/cpu/sparc/nativeInst_sparc.hpp --- a/src/hotspot/cpu/sparc/nativeInst_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/nativeInst_sparc.hpp 2023-04-16 11:42:11.069365084 +0000 @@ -0,0 +1,814 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_NATIVEINST_SPARC_HPP +#define CPU_SPARC_NATIVEINST_SPARC_HPP + +#include "asm/macroAssembler.hpp" +#include "runtime/icache.hpp" +#include "runtime/os.hpp" + +// We have interface for the following instructions: +// - NativeInstruction +// - - NativeCall +// - - NativeFarCall +// - - NativeMovConstReg +// - - NativeMovConstRegPatching +// - - NativeMovRegMem +// - - NativeJump +// - - NativeGeneralJump +// - - NativeIllegalInstruction +// The base class for different kinds of native instruction abstractions. +// Provides the primitive operations to manipulate code relative to this. +class NativeInstruction { + friend class Relocation; + + public: + enum Sparc_specific_constants { + nop_instruction_size = 4 + }; + + bool is_nop() { return long_at(0) == nop_instruction(); } + bool is_call() { return is_op(long_at(0), Assembler::call_op); } + bool is_call_reg() { return is_op(long_at(0), Assembler::arith_op); } + bool is_sethi() { return (is_op2(long_at(0), Assembler::sethi_op2) + && inv_rd(long_at(0)) != G0); } + + bool sets_cc() { + // conservative (returns true for some instructions that do not set the + // the condition code, such as, "save". + // Does not return true for the deprecated tagged instructions, such as, TADDcc + int x = long_at(0); + return (is_op(x, Assembler::arith_op) && + (inv_op3(x) & Assembler::cc_bit_op3) == Assembler::cc_bit_op3); + } + bool is_illegal(); + bool is_zombie() { + int x = long_at(0); + return (is_op3(x, Assembler::ldsw_op3, Assembler::ldst_op) && + inv_rs1(x) == G0 && inv_rd(x) == O7); + } + bool is_ic_miss_trap(); // Inline-cache uses a trap to detect a miss + bool is_return() { + // is it the output of MacroAssembler::ret or MacroAssembler::retl? + int x = long_at(0); + const int pc_return_offset = 8; // see frame_sparc.hpp + return is_op3(x, Assembler::jmpl_op3, Assembler::arith_op) + && (inv_rs1(x) == I7 || inv_rs1(x) == O7) + && inv_immed(x) && inv_simm(x, 13) == pc_return_offset + && inv_rd(x) == G0; + } + bool is_int_jump() { + // is it the output of MacroAssembler::b? + int x = long_at(0); + return is_op2(x, Assembler::bp_op2) || is_op2(x, Assembler::br_op2); + } + bool is_float_jump() { + // is it the output of MacroAssembler::fb? + int x = long_at(0); + return is_op2(x, Assembler::fbp_op2) || is_op2(x, Assembler::fb_op2); + } + bool is_jump() { + return is_int_jump() || is_float_jump(); + } + bool is_cond_jump() { + int x = long_at(0); + return (is_int_jump() && Assembler::inv_cond(x) != Assembler::always) || + (is_float_jump() && Assembler::inv_cond(x) != Assembler::f_always); + } + + bool is_stack_bang() { + int x = long_at(0); + return is_op3(x, Assembler::stw_op3, Assembler::ldst_op) && + (inv_rd(x) == G0) && (inv_rs1(x) == SP) && (inv_rs2(x) == G3_scratch); + } + + bool is_prefetch() { + int x = long_at(0); + return is_op3(x, Assembler::prefetch_op3, Assembler::ldst_op); + } + + bool is_membar() { + int x = long_at(0); + return is_op3(x, Assembler::membar_op3, Assembler::arith_op) && + (inv_rd(x) == G0) && (inv_rs1(x) == O7); + } + + bool is_safepoint_poll() { + int x = long_at(0); + return is_op3(x, Assembler::ldx_op3, Assembler::ldst_op) && + (inv_rd(x) == G0) && (inv_immed(x) ? Assembler::inv_simm13(x) == 0 : inv_rs2(x) == G0); + } + + bool is_zero_test(Register ®); + bool is_load_store_with_small_offset(Register reg); + + public: + static int nop_instruction() { return Assembler::op(Assembler::branch_op) | Assembler::op2(Assembler::sethi_op2); } + static int illegal_instruction(); // the output of __ breakpoint_trap() + static int call_instruction(address destination, address pc) { return Assembler::op(Assembler::call_op) | Assembler::wdisp((intptr_t)destination, (intptr_t)pc, 30); } + +protected: + address addr_at(int offset) const { return address(this) + offset; } + int long_at(int offset) const { return *(int*)addr_at(offset); } + void set_long_at(int offset, int i); /* deals with I-cache */ + void set_jlong_at(int offset, jlong i); /* deals with I-cache */ + void set_addr_at(int offset, address x); /* deals with I-cache */ + + address instruction_address() const { return addr_at(0); } + address next_instruction_address() const { return addr_at(BytesPerInstWord); } + + static bool is_op( int x, Assembler::ops opval) { + return Assembler::inv_op(x) == opval; + } + static bool is_op2(int x, Assembler::op2s op2val) { + return Assembler::inv_op(x) == Assembler::branch_op && Assembler::inv_op2(x) == op2val; + } + static bool is_op3(int x, Assembler::op3s op3val, Assembler::ops opval) { + return Assembler::inv_op(x) == opval && Assembler::inv_op3(x) == op3val; + } + + // utilities to help subclasses decode: + static Register inv_rd( int x ) { return Assembler::inv_rd( x); } + static Register inv_rs1( int x ) { return Assembler::inv_rs1(x); } + static Register inv_rs2( int x ) { return Assembler::inv_rs2(x); } + + static bool inv_immed( int x ) { return Assembler::inv_immed(x); } + static bool inv_annul( int x ) { return (Assembler::annul(true) & x) != 0; } + static int inv_cond( int x ) { return Assembler::inv_cond(x); } + + static int inv_op( int x ) { return Assembler::inv_op( x); } + static int inv_op2( int x ) { return Assembler::inv_op2(x); } + static int inv_op3( int x ) { return Assembler::inv_op3(x); } + + static int inv_simm( int x, int nbits ) { return Assembler::inv_simm(x, nbits); } + static intptr_t inv_wdisp( int x, int nbits ) { return Assembler::inv_wdisp( x, 0, nbits); } + static intptr_t inv_wdisp16( int x ) { return Assembler::inv_wdisp16(x, 0); } + static int branch_destination_offset(int x) { return MacroAssembler::branch_destination(x, 0); } + static int patch_branch_destination_offset(int dest_offset, int x) { + return MacroAssembler::patched_branch(dest_offset, x, 0); + } + + // utility for checking if x is either of 2 small constants + static bool is_either(int x, int k1, int k2) { + // return x == k1 || x == k2; + return (1 << x) & (1 << k1 | 1 << k2); + } + + // utility for checking overflow of signed instruction fields + static bool fits_in_simm(int x, int nbits) { + // cf. Assembler::assert_signed_range() + // return -(1 << nbits-1) <= x && x < ( 1 << nbits-1), + return (unsigned)(x + (1 << nbits-1)) < (unsigned)(1 << nbits); + } + + // set a signed immediate field + static int set_simm(int insn, int imm, int nbits) { + return (insn &~ Assembler::simm(-1, nbits)) | Assembler::simm(imm, nbits); + } + + // set a wdisp field (disp should be the difference of two addresses) + static int set_wdisp(int insn, intptr_t disp, int nbits) { + return (insn &~ Assembler::wdisp((intptr_t)-4, (intptr_t)0, nbits)) | Assembler::wdisp(disp, 0, nbits); + } + + static int set_wdisp16(int insn, intptr_t disp) { + return (insn &~ Assembler::wdisp16((intptr_t)-4, 0)) | Assembler::wdisp16(disp, 0); + } + + // get a simm13 field from an arithmetic or memory instruction + static int get_simm13(int insn) { + assert(is_either(Assembler::inv_op(insn), + Assembler::arith_op, Assembler::ldst_op) && + (insn & Assembler::immed(true)), "must have a simm13 field"); + return Assembler::inv_simm(insn, 13); + } + + // set the simm13 field of an arithmetic or memory instruction + static bool set_simm13(int insn, int imm) { + get_simm13(insn); // tickle the assertion check + return set_simm(insn, imm, 13); + } + + // combine the fields of a sethi stream (7 instructions ) and an add, jmp or ld/st + static intptr_t data64( address pc, int arith_insn ) { + assert(is_op2(*(unsigned int *)pc, Assembler::sethi_op2), "must be sethi"); + intptr_t hi = (intptr_t)gethi( (unsigned int *)pc ); + intptr_t lo = (intptr_t)get_simm13(arith_insn); + assert((unsigned)lo < (1 << 10), "offset field of set_metadata must be 10 bits"); + return hi | lo; + } + + // Regenerate the instruction sequence that performs the 64 bit + // sethi. This only does the sethi. The disp field (bottom 10 bits) + // must be handled separately. + static void set_data64_sethi(address instaddr, intptr_t x); + static void verify_data64_sethi(address instaddr, intptr_t x); + + // combine the fields of a sethi/simm13 pair (simm13 = or, add, jmpl, ld/st) + static int data32(int sethi_insn, int arith_insn) { + assert(is_op2(sethi_insn, Assembler::sethi_op2), "must be sethi"); + int hi = Assembler::inv_hi22(sethi_insn); + int lo = get_simm13(arith_insn); + assert((unsigned)lo < (1 << 10), "offset field of set_metadata must be 10 bits"); + return hi | lo; + } + + static int set_data32_sethi(int sethi_insn, int imm) { + // note that Assembler::hi22 clips the low 10 bits for us + assert(is_op2(sethi_insn, Assembler::sethi_op2), "must be sethi"); + return (sethi_insn &~ Assembler::hi22(-1)) | Assembler::hi22(imm); + } + + static int set_data32_simm13(int arith_insn, int imm) { + get_simm13(arith_insn); // tickle the assertion check + int imm10 = Assembler::low10(imm); + return (arith_insn &~ Assembler::simm(-1, 13)) | Assembler::simm(imm10, 13); + } + + static int low10(int imm) { + return Assembler::low10(imm); + } + + // Perform the inverse of the LP64 Macroassembler::sethi + // routine. Extracts the 54 bits of address from the instruction + // stream. This routine must agree with the sethi routine in + // assembler_inline_sparc.hpp + static address gethi( unsigned int *pc ) { + int i = 0; + uintptr_t adr; + // We first start out with the real sethi instruction + assert(is_op2(*pc, Assembler::sethi_op2), "in gethi - must be sethi"); + adr = (unsigned int)Assembler::inv_hi22( *(pc++) ); + i++; + while ( i < 7 ) { + // We're done if we hit a nop + if ( (int)*pc == nop_instruction() ) break; + assert ( Assembler::inv_op(*pc) == Assembler::arith_op, "in gethi - must be arith_op" ); + switch ( Assembler::inv_op3(*pc) ) { + case Assembler::xor_op3: + adr ^= (intptr_t)get_simm13( *pc ); + return ( (address)adr ); + break; + case Assembler::sll_op3: + adr <<= ( *pc & 0x3f ); + break; + case Assembler::or_op3: + adr |= (intptr_t)get_simm13( *pc ); + break; + default: + assert ( 0, "in gethi - Should not reach here" ); + break; + } + pc++; + i++; + } + return ( (address)adr ); + } + + public: + void verify(); + void print(); + + // unit test stuff + static void test() {} // override for testing + + inline friend NativeInstruction* nativeInstruction_at(address address); +}; + +inline NativeInstruction* nativeInstruction_at(address address) { + NativeInstruction* inst = (NativeInstruction*)address; +#ifdef ASSERT + inst->verify(); +#endif + return inst; +} + + + +//----------------------------------------------------------------------------- + +// The NativeCall is an abstraction for accessing/manipulating native call imm32 instructions. +// (used to manipulate inline caches, primitive & dll calls, etc.) +class NativeCall; + +inline NativeCall* nativeCall_at(address instr); +inline NativeCall* nativeCall_overwriting_at(address instr, + address destination = NULL); +inline NativeCall* nativeCall_before(address return_address); +class NativeCall: public NativeInstruction { + public: + enum Sparc_specific_constants { + instruction_size = 8, + return_address_offset = 8, + call_displacement_width = 30, + displacement_offset = 0, + instruction_offset = 0 + }; + address instruction_address() const { return addr_at(0); } + address next_instruction_address() const { return addr_at(instruction_size); } + address return_address() const { return addr_at(return_address_offset); } + + address destination() const { return inv_wdisp(long_at(0), call_displacement_width) + instruction_address(); } + address displacement_address() const { return addr_at(displacement_offset); } + void set_destination(address dest) { set_long_at(0, set_wdisp(long_at(0), dest - instruction_address(), call_displacement_width)); } + void set_destination_mt_safe(address dest); + + void verify_alignment() {} // do nothing on sparc + void verify(); + void print(); + + // unit test stuff + static void test(); + + // Creation + friend inline NativeCall* nativeCall_at(address instr); + friend NativeCall* nativeCall_overwriting_at(address instr, address destination) { + // insert a "blank" call: + NativeCall* call = (NativeCall*)instr; + call->set_long_at(0 * BytesPerInstWord, call_instruction(destination, instr)); + call->set_long_at(1 * BytesPerInstWord, nop_instruction()); + assert(call->addr_at(2 * BytesPerInstWord) - instr == instruction_size, "instruction size"); + // check its structure now: + assert(nativeCall_at(instr)->destination() == destination, "correct call destination"); + return call; + } + + friend inline NativeCall* nativeCall_before(address return_address) { + NativeCall* call = (NativeCall*)(return_address - return_address_offset); + #ifdef ASSERT + call->verify(); + #endif + return call; + } + + static bool is_call_at(address instr) { + return nativeInstruction_at(instr)->is_call(); + } + + static bool is_call_before(address instr) { + return nativeInstruction_at(instr - return_address_offset)->is_call(); + } + + static bool is_call_to(address instr, address target) { + return nativeInstruction_at(instr)->is_call() && + nativeCall_at(instr)->destination() == target; + } + + // MT-safe patching of a call instruction. + static void insert(address code_pos, address entry) { + (void)nativeCall_overwriting_at(code_pos, entry); + } + + static void replace_mt_safe(address instr_addr, address code_buffer); +}; +inline NativeCall* nativeCall_at(address instr) { + NativeCall* call = (NativeCall*)instr; +#ifdef ASSERT + call->verify(); +#endif + return call; +} + +class NativeCallReg: public NativeInstruction { + public: + enum Sparc_specific_constants { + instruction_size = 8, + return_address_offset = 8, + instruction_offset = 0 + }; + + address next_instruction_address() const { + return addr_at(instruction_size); + } +}; + +// The NativeFarCall is an abstraction for accessing/manipulating native call-anywhere +// instructions in the sparcv9 vm. Used to call native methods which may be loaded +// anywhere in the address space, possibly out of reach of a call instruction. + +// The format of this extended-range call is: +// jumpl_to addr, lreg +// == sethi %hi54(addr), O7 ; jumpl O7, %lo10(addr), O7 ; +// That is, it is essentially the same as a NativeJump. +class NativeFarCall; +inline NativeFarCall* nativeFarCall_overwriting_at(address instr, address destination = NULL); +inline NativeFarCall* nativeFarCall_at(address instr); +class NativeFarCall: public NativeInstruction { + public: + enum Sparc_specific_constants { + // instruction_size includes the delay slot instruction. + instruction_size = 9 * BytesPerInstWord, + return_address_offset = 9 * BytesPerInstWord, + jmpl_offset = 7 * BytesPerInstWord, + displacement_offset = 0, + instruction_offset = 0 + }; + address instruction_address() const { return addr_at(0); } + address next_instruction_address() const { return addr_at(instruction_size); } + address return_address() const { return addr_at(return_address_offset); } + + address destination() const { + return (address) data64(addr_at(0), long_at(jmpl_offset)); + } + address displacement_address() const { return addr_at(displacement_offset); } + void set_destination(address dest); + + bool destination_is_compiled_verified_entry_point(); + + void verify(); + void print(); + + // unit test stuff + static void test(); + + // Creation + friend inline NativeFarCall* nativeFarCall_at(address instr) { + NativeFarCall* call = (NativeFarCall*)instr; + #ifdef ASSERT + call->verify(); + #endif + return call; + } + + friend inline NativeFarCall* nativeFarCall_overwriting_at(address instr, address destination) { + Unimplemented(); + NativeFarCall* call = (NativeFarCall*)instr; + return call; + } + + friend NativeFarCall* nativeFarCall_before(address return_address) { + NativeFarCall* call = (NativeFarCall*)(return_address - return_address_offset); + #ifdef ASSERT + call->verify(); + #endif + return call; + } + + static bool is_call_at(address instr); + + // MT-safe patching of a call instruction. + static void insert(address code_pos, address entry) { + (void)nativeFarCall_overwriting_at(code_pos, entry); + } + static void replace_mt_safe(address instr_addr, address code_buffer); +}; + + +// An interface for accessing/manipulating 32 bit native set_metadata imm, reg instructions +// (used to manipulate inlined data references, etc.) +// set_metadata imm, reg +// == sethi %hi22(imm), reg ; add reg, %lo10(imm), reg +class NativeMovConstReg32; +inline NativeMovConstReg32* nativeMovConstReg32_at(address address); +class NativeMovConstReg32: public NativeInstruction { + public: + enum Sparc_specific_constants { + sethi_offset = 0, + add_offset = 4, + instruction_size = 8 + }; + + address instruction_address() const { return addr_at(0); } + address next_instruction_address() const { return addr_at(instruction_size); } + + // (The [set_]data accessor respects oop_type relocs also.) + intptr_t data() const; + void set_data(intptr_t x); + + // report the destination register + Register destination() { return inv_rd(long_at(sethi_offset)); } + + void verify(); + void print(); + + // unit test stuff + static void test(); + + // Creation + friend inline NativeMovConstReg32* nativeMovConstReg32_at(address address) { + NativeMovConstReg32* test = (NativeMovConstReg32*)address; + #ifdef ASSERT + test->verify(); + #endif + return test; + } +}; + +// An interface for accessing/manipulating native set_metadata imm, reg instructions. +// (used to manipulate inlined data references, etc.) +// set_metadata imm, reg +// == sethi %hi22(imm), reg ; add reg, %lo10(imm), reg +class NativeMovConstReg; +inline NativeMovConstReg* nativeMovConstReg_at(address address); +class NativeMovConstReg: public NativeInstruction { + public: + enum Sparc_specific_constants { + sethi_offset = 0, + add_offset = 7 * BytesPerInstWord, + instruction_size = 8 * BytesPerInstWord + }; + + address instruction_address() const { return addr_at(0); } + address next_instruction_address() const { return addr_at(instruction_size); } + + // (The [set_]data accessor respects oop_type relocs also.) + intptr_t data() const; + void set_data(intptr_t x); + + // report the destination register + Register destination() { return inv_rd(long_at(sethi_offset)); } + + void verify(); + void print(); + + // unit test stuff + static void test(); + + // Creation + friend inline NativeMovConstReg* nativeMovConstReg_at(address address) { + NativeMovConstReg* test = (NativeMovConstReg*)address; + #ifdef ASSERT + test->verify(); + #endif + return test; + } + + + friend NativeMovConstReg* nativeMovConstReg_before(address address) { + NativeMovConstReg* test = (NativeMovConstReg*)(address - instruction_size); + #ifdef ASSERT + test->verify(); + #endif + return test; + } + +}; + + +// An interface for accessing/manipulating native set_metadata imm, reg instructions. +// (used to manipulate inlined data references, etc.) +// set_metadata imm, reg +// == sethi %hi22(imm), reg; nop; add reg, %lo10(imm), reg +// +// Note that it is identical to NativeMovConstReg with the exception of a nop between the +// sethi and the add. The nop is required to be in the delay slot of the call instruction +// which overwrites the sethi during patching. +class NativeMovConstRegPatching; +inline NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address); +class NativeMovConstRegPatching: public NativeInstruction { + public: + enum Sparc_specific_constants { + sethi_offset = 0, + nop_offset = 7 * BytesPerInstWord, + add_offset = nop_offset + BytesPerInstWord, + instruction_size = add_offset + BytesPerInstWord + }; + + address instruction_address() const { return addr_at(0); } + address next_instruction_address() const { return addr_at(instruction_size); } + + // (The [set_]data accessor respects oop_type relocs also.) + int data() const; + void set_data(int x); + + // report the destination register + Register destination() { return inv_rd(long_at(sethi_offset)); } + + void verify(); + void print(); + + // unit test stuff + static void test(); + + // Creation + friend inline NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) { + NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)address; + #ifdef ASSERT + test->verify(); + #endif + return test; + } + + + friend NativeMovConstRegPatching* nativeMovConstRegPatching_before(address address) { + NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_size); + #ifdef ASSERT + test->verify(); + #endif + return test; + } + +}; + + +// An interface for accessing/manipulating native memory ops +// ld* [reg + offset], reg +// st* reg, [reg + offset] +// sethi %hi(imm), reg; add reg, %lo(imm), reg; ld* [reg1 + reg], reg2 +// sethi %hi(imm), reg; add reg, %lo(imm), reg; st* reg2, [reg1 + reg] +// Ops covered: {lds,ldu,st}{w,b,h}, {ld,st}{d,x} +// +class NativeMovRegMem; +inline NativeMovRegMem* nativeMovRegMem_at (address address); +class NativeMovRegMem: public NativeInstruction { + public: + enum Sparc_specific_constants { + op3_mask_ld = 1 << Assembler::lduw_op3 | + 1 << Assembler::ldub_op3 | + 1 << Assembler::lduh_op3 | + 1 << Assembler::ldd_op3 | + 1 << Assembler::ldsw_op3 | + 1 << Assembler::ldsb_op3 | + 1 << Assembler::ldsh_op3 | + 1 << Assembler::ldx_op3, + op3_mask_st = 1 << Assembler::stw_op3 | + 1 << Assembler::stb_op3 | + 1 << Assembler::sth_op3 | + 1 << Assembler::std_op3 | + 1 << Assembler::stx_op3, + op3_ldst_int_limit = Assembler::ldf_op3, + op3_mask_ldf = 1 << (Assembler::ldf_op3 - op3_ldst_int_limit) | + 1 << (Assembler::lddf_op3 - op3_ldst_int_limit), + op3_mask_stf = 1 << (Assembler::stf_op3 - op3_ldst_int_limit) | + 1 << (Assembler::stdf_op3 - op3_ldst_int_limit), + + offset_width = 13, + sethi_offset = 0, + add_offset = 7 * BytesPerInstWord, + ldst_offset = add_offset + BytesPerInstWord + }; + bool is_immediate() const { + // check if instruction is ld* [reg + offset], reg or st* reg, [reg + offset] + int i0 = long_at(0); + return (is_op(i0, Assembler::ldst_op)); + } + + address instruction_address() const { return addr_at(0); } + + int num_bytes_to_end_of_patch() const { + return is_immediate()? BytesPerInstWord : + NativeMovConstReg::instruction_size; + } + + intptr_t offset() const { + return is_immediate()? inv_simm(long_at(0), offset_width) : + nativeMovConstReg_at(addr_at(0))->data(); + } + void set_offset(intptr_t x) { + if (is_immediate()) { + guarantee(fits_in_simm(x, offset_width), "data block offset overflow"); + set_long_at(0, set_simm(long_at(0), x, offset_width)); + } else + nativeMovConstReg_at(addr_at(0))->set_data(x); + } + + void add_offset_in_bytes(intptr_t radd_offset) { + set_offset (offset() + radd_offset); + } + + void verify(); + void print (); + + // unit test stuff + static void test(); + + private: + friend inline NativeMovRegMem* nativeMovRegMem_at (address address) { + NativeMovRegMem* test = (NativeMovRegMem*)address; + #ifdef ASSERT + test->verify(); + #endif + return test; + } +}; + + +// An interface for accessing/manipulating native jumps +// jump_to addr +// == sethi %hi22(addr), temp ; jumpl reg, %lo10(addr), G0 ; +// jumpl_to addr, lreg +// == sethi %hi22(addr), temp ; jumpl reg, %lo10(addr), lreg ; +class NativeJump; +inline NativeJump* nativeJump_at(address address); +class NativeJump: public NativeInstruction { + private: + void guarantee_displacement(int disp, int width) { + guarantee(fits_in_simm(disp, width + 2), "branch displacement overflow"); + } + + public: + enum Sparc_specific_constants { + sethi_offset = 0, + jmpl_offset = 7 * BytesPerInstWord, + instruction_size = 9 * BytesPerInstWord // includes delay slot + }; + + address instruction_address() const { return addr_at(0); } + address next_instruction_address() const { return addr_at(instruction_size); } + + address jump_destination() const { + return (address) data64(instruction_address(), long_at(jmpl_offset)); + } + void set_jump_destination(address dest) { + set_data64_sethi( instruction_address(), (intptr_t)dest); + set_long_at(jmpl_offset, set_data32_simm13( long_at(jmpl_offset), (intptr_t)dest)); + } + + // Creation + friend inline NativeJump* nativeJump_at(address address) { + NativeJump* jump = (NativeJump*)address; + #ifdef ASSERT + jump->verify(); + #endif + return jump; + } + + void verify(); + void print(); + + // Unit testing stuff + static void test(); + + // Insertion of native jump instruction + static void insert(address code_pos, address entry); + // MT-safe insertion of native jump at verified method entry + static void check_verified_entry_alignment(address entry, address verified_entry) { + // nothing to do for sparc. + } + static void patch_verified_entry(address entry, address verified_entry, address dest); +}; + + + +// Despite the name, handles only simple branches. +class NativeGeneralJump; +inline NativeGeneralJump* nativeGeneralJump_at(address address); +class NativeGeneralJump: public NativeInstruction { + public: + enum Sparc_specific_constants { + instruction_size = 8 + }; + + address instruction_address() const { return addr_at(0); } + address jump_destination() const { return addr_at(0) + branch_destination_offset(long_at(0)); } + void set_jump_destination(address dest) { + int patched_instr = patch_branch_destination_offset(dest - addr_at(0), long_at(0)); + set_long_at(0, patched_instr); + } + NativeInstruction *delay_slot_instr() { return nativeInstruction_at(addr_at(4));} + void fill_delay_slot(int instr) { set_long_at(4, instr);} + Assembler::Condition condition() { + int x = long_at(0); + return (Assembler::Condition) Assembler::inv_cond(x); + } + + // Creation + friend inline NativeGeneralJump* nativeGeneralJump_at(address address) { + NativeGeneralJump* jump = (NativeGeneralJump*)(address); +#ifdef ASSERT + jump->verify(); +#endif + return jump; + } + + // Insertion of native general jump instruction + static void insert_unconditional(address code_pos, address entry); + static void replace_mt_safe(address instr_addr, address code_buffer); + + void verify(); +}; + + +class NativeIllegalInstruction: public NativeInstruction { + public: + enum Sparc_specific_constants { + instruction_size = 4 + }; + + // Insert illegal opcode as specific address + static void insert(address code_pos); +}; + +#endif // CPU_SPARC_NATIVEINST_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/registerMap_sparc.hpp b/src/hotspot/cpu/sparc/registerMap_sparc.hpp --- a/src/hotspot/cpu/sparc/registerMap_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/registerMap_sparc.hpp 2023-04-16 11:42:11.069480970 +0000 @@ -0,0 +1,62 @@ +/* + * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_REGISTERMAP_SPARC_HPP +#define CPU_SPARC_REGISTERMAP_SPARC_HPP + +// machine-dependent implementation for register maps + friend class frame; + friend class MethodHandles; + + private: + intptr_t* _window; // register window save area (for L and I regs) + intptr_t* _younger_window; // previous save area (for O regs, if needed) + + address pd_location(VMReg reg) const; + address pd_location(VMReg base_reg, int slot_idx) const { + return location(base_reg->next(slot_idx)); + } + void pd_clear(); + void pd_initialize_from(const RegisterMap* map) { + _window = map->_window; + _younger_window = map->_younger_window; + _location_valid[0] = 0; // avoid the shift_individual_registers game + } + void pd_initialize() { + _window = NULL; + _younger_window = NULL; + _location_valid[0] = 0; // avoid the shift_individual_registers game + } + void shift_window(intptr_t* sp, intptr_t* younger_sp) { + _window = sp; + _younger_window = younger_sp; + // Throw away locations for %i, %o, and %l registers: + // But do not throw away %g register locs. + if (_location_valid[0] != 0) shift_individual_registers(); + } + void shift_individual_registers(); + // When popping out of compiled frames, we make all IRegs disappear. + void make_integer_regs_unsaved() { _location_valid[0] = 0; } + +#endif // CPU_SPARC_REGISTERMAP_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/register_definitions_sparc.cpp b/src/hotspot/cpu/sparc/register_definitions_sparc.cpp --- a/src/hotspot/cpu/sparc/register_definitions_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/register_definitions_sparc.cpp 2023-04-16 11:42:11.069643918 +0000 @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// Note: precompiled headers can not be used in this file because of the above +// definition + +#include "asm/assembler.hpp" +#include "asm/register.hpp" +#include "interp_masm_sparc.hpp" +#include "register_sparc.hpp" + +REGISTER_DEFINITION(Register, noreg); + +REGISTER_DEFINITION(Register, G0); +REGISTER_DEFINITION(Register, G1); +REGISTER_DEFINITION(Register, G2); +REGISTER_DEFINITION(Register, G3); +REGISTER_DEFINITION(Register, G4); +REGISTER_DEFINITION(Register, G5); +REGISTER_DEFINITION(Register, G6); +REGISTER_DEFINITION(Register, G7); + +REGISTER_DEFINITION(Register, O0); +REGISTER_DEFINITION(Register, O1); +REGISTER_DEFINITION(Register, O2); +REGISTER_DEFINITION(Register, O3); +REGISTER_DEFINITION(Register, O4); +REGISTER_DEFINITION(Register, O5); +REGISTER_DEFINITION(Register, O6); +REGISTER_DEFINITION(Register, O7); + +REGISTER_DEFINITION(Register, L0); +REGISTER_DEFINITION(Register, L1); +REGISTER_DEFINITION(Register, L2); +REGISTER_DEFINITION(Register, L3); +REGISTER_DEFINITION(Register, L4); +REGISTER_DEFINITION(Register, L5); +REGISTER_DEFINITION(Register, L6); +REGISTER_DEFINITION(Register, L7); + +REGISTER_DEFINITION(Register, I0); +REGISTER_DEFINITION(Register, I1); +REGISTER_DEFINITION(Register, I2); +REGISTER_DEFINITION(Register, I3); +REGISTER_DEFINITION(Register, I4); +REGISTER_DEFINITION(Register, I5); +REGISTER_DEFINITION(Register, I6); +REGISTER_DEFINITION(Register, I7); + +REGISTER_DEFINITION(Register, FP); +REGISTER_DEFINITION(Register, SP); + +REGISTER_DEFINITION(FloatRegister, fnoreg); +REGISTER_DEFINITION(FloatRegister, F0); +REGISTER_DEFINITION(FloatRegister, F1); +REGISTER_DEFINITION(FloatRegister, F2); +REGISTER_DEFINITION(FloatRegister, F3); +REGISTER_DEFINITION(FloatRegister, F4); +REGISTER_DEFINITION(FloatRegister, F5); +REGISTER_DEFINITION(FloatRegister, F6); +REGISTER_DEFINITION(FloatRegister, F7); +REGISTER_DEFINITION(FloatRegister, F8); +REGISTER_DEFINITION(FloatRegister, F9); +REGISTER_DEFINITION(FloatRegister, F10); +REGISTER_DEFINITION(FloatRegister, F11); +REGISTER_DEFINITION(FloatRegister, F12); +REGISTER_DEFINITION(FloatRegister, F13); +REGISTER_DEFINITION(FloatRegister, F14); +REGISTER_DEFINITION(FloatRegister, F15); +REGISTER_DEFINITION(FloatRegister, F16); +REGISTER_DEFINITION(FloatRegister, F17); +REGISTER_DEFINITION(FloatRegister, F18); +REGISTER_DEFINITION(FloatRegister, F19); +REGISTER_DEFINITION(FloatRegister, F20); +REGISTER_DEFINITION(FloatRegister, F21); +REGISTER_DEFINITION(FloatRegister, F22); +REGISTER_DEFINITION(FloatRegister, F23); +REGISTER_DEFINITION(FloatRegister, F24); +REGISTER_DEFINITION(FloatRegister, F25); +REGISTER_DEFINITION(FloatRegister, F26); +REGISTER_DEFINITION(FloatRegister, F27); +REGISTER_DEFINITION(FloatRegister, F28); +REGISTER_DEFINITION(FloatRegister, F29); +REGISTER_DEFINITION(FloatRegister, F30); +REGISTER_DEFINITION(FloatRegister, F31); +REGISTER_DEFINITION(FloatRegister, F32); +REGISTER_DEFINITION(FloatRegister, F34); +REGISTER_DEFINITION(FloatRegister, F36); +REGISTER_DEFINITION(FloatRegister, F38); +REGISTER_DEFINITION(FloatRegister, F40); +REGISTER_DEFINITION(FloatRegister, F42); +REGISTER_DEFINITION(FloatRegister, F44); +REGISTER_DEFINITION(FloatRegister, F46); +REGISTER_DEFINITION(FloatRegister, F48); +REGISTER_DEFINITION(FloatRegister, F50); +REGISTER_DEFINITION(FloatRegister, F52); +REGISTER_DEFINITION(FloatRegister, F54); +REGISTER_DEFINITION(FloatRegister, F56); +REGISTER_DEFINITION(FloatRegister, F58); +REGISTER_DEFINITION(FloatRegister, F60); +REGISTER_DEFINITION(FloatRegister, F62); + + +REGISTER_DEFINITION( Register, Otos_i); +REGISTER_DEFINITION( Register, Otos_l); +REGISTER_DEFINITION( Register, Otos_l1); +REGISTER_DEFINITION( Register, Otos_l2); +REGISTER_DEFINITION(FloatRegister, Ftos_f); +REGISTER_DEFINITION(FloatRegister, Ftos_d); +REGISTER_DEFINITION(FloatRegister, Ftos_d1); +REGISTER_DEFINITION(FloatRegister, Ftos_d2); + + +REGISTER_DEFINITION(Register, G2_thread); +REGISTER_DEFINITION(Register, G6_heapbase); +REGISTER_DEFINITION(Register, G5_method); +REGISTER_DEFINITION(Register, G5_megamorphic_method); +REGISTER_DEFINITION(Register, G5_inline_cache_reg); +REGISTER_DEFINITION(Register, Gargs); +REGISTER_DEFINITION(Register, L7_thread_cache); +REGISTER_DEFINITION(Register, Gframe_size); +REGISTER_DEFINITION(Register, G1_scratch); +REGISTER_DEFINITION(Register, G3_scratch); +REGISTER_DEFINITION(Register, G4_scratch); +REGISTER_DEFINITION(Register, Gtemp); +REGISTER_DEFINITION(Register, Lentry_args); + +// JSR 292 +REGISTER_DEFINITION(Register, G5_method_type); +REGISTER_DEFINITION(Register, G3_method_handle); +REGISTER_DEFINITION(Register, L7_mh_SP_save); + +REGISTER_DEFINITION(Register, Lesp); +REGISTER_DEFINITION(Register, Lbcp); +REGISTER_DEFINITION(Register, Lmonitors); +REGISTER_DEFINITION(Register, Lbyte_code); +REGISTER_DEFINITION(Register, Llast_SP); +REGISTER_DEFINITION(Register, Lscratch); +REGISTER_DEFINITION(Register, Lscratch2); +REGISTER_DEFINITION(Register, LcpoolCache); +REGISTER_DEFINITION(Register, I5_savedSP); +REGISTER_DEFINITION(Register, O5_savedSP); +REGISTER_DEFINITION(Register, IdispatchAddress); +REGISTER_DEFINITION(Register, ImethodDataPtr); +REGISTER_DEFINITION(Register, Lmethod); +REGISTER_DEFINITION(Register, Llocals); +REGISTER_DEFINITION(Register, Oexception); +REGISTER_DEFINITION(Register, Oissuing_pc); diff -ur --new-file a/src/hotspot/cpu/sparc/register_sparc.cpp b/src/hotspot/cpu/sparc/register_sparc.cpp --- a/src/hotspot/cpu/sparc/register_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/register_sparc.cpp 2023-04-16 11:42:11.069757399 +0000 @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "register_sparc.hpp" + +const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers << 1; +const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr + FloatRegisterImpl::number_of_registers; + +const char* RegisterImpl::name() const { + const char* names[number_of_registers] = { + "G0", "G1", "G2", "G3", "G4", "G5", "G6", "G7", + "O0", "O1", "O2", "O3", "O4", "O5", "SP", "O7", + "L0", "L1", "L2", "L3", "L4", "L5", "L6", "L7", + "I0", "I1", "I2", "I3", "I4", "I5", "FP", "I7" + }; + return is_valid() ? names[encoding()] : "noreg"; +} + + +const char* FloatRegisterImpl::name() const { + const char* names[number_of_registers] = { + "F0", "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", + "F10", "F11", "F12", "F13", "F14", "F15", "F16", "F17", "F18", "F19", + "F20", "F21", "F22", "F23", "F24", "F25", "F26", "F27", "F28", "F29", + "F30", "F31", "F32", "F33?", "F34", "F35?", "F36", "F37?", "F38", "F39?", + "F40", "F41?", "F42", "F43?", "F44", "F45?", "F46", "F47?", "F48", "F49?", + "F50", "F51?", "F52", "F53?", "F54", "F55?", "F56", "F57?", "F58", "F59?", + "F60", "F61?", "F62" + }; + return is_valid() ? names[encoding()] : "fnoreg"; +} diff -ur --new-file a/src/hotspot/cpu/sparc/register_sparc.hpp b/src/hotspot/cpu/sparc/register_sparc.hpp --- a/src/hotspot/cpu/sparc/register_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/register_sparc.hpp 2023-04-16 11:42:11.069978533 +0000 @@ -0,0 +1,340 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_REGISTER_SPARC_HPP +#define CPU_SPARC_REGISTER_SPARC_HPP + +#include "asm/register.hpp" + +// forward declaration +class Address; +class VMRegImpl; +typedef VMRegImpl* VMReg; + + +// Use Register as shortcut +class RegisterImpl; +typedef RegisterImpl* Register; + + +inline Register as_Register(int encoding) { + return (Register)(intptr_t) encoding; +} + +// The implementation of integer registers for the SPARC architecture +class RegisterImpl: public AbstractRegisterImpl { + public: + enum { + log_set_size = 3, // the number of bits to encode the set register number + number_of_sets = 4, // the number of registers sets (in, local, out, global) + number_of_registers = number_of_sets << log_set_size, + + iset_no = 3, ibase = iset_no << log_set_size, // the in register set + lset_no = 2, lbase = lset_no << log_set_size, // the local register set + oset_no = 1, obase = oset_no << log_set_size, // the output register set + gset_no = 0, gbase = gset_no << log_set_size // the global register set + }; + + + friend Register as_Register(int encoding); + // set specific construction + friend Register as_iRegister(int number); + friend Register as_lRegister(int number); + friend Register as_oRegister(int number); + friend Register as_gRegister(int number); + + inline VMReg as_VMReg(); + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return value(); } + const char* name() const; + + // testers + bool is_valid() const { return (0 <= (value()&0x7F) && (value()&0x7F) < number_of_registers); } + bool is_even() const { return (encoding() & 1) == 0; } + bool is_in() const { return (encoding() >> log_set_size) == iset_no; } + bool is_local() const { return (encoding() >> log_set_size) == lset_no; } + bool is_out() const { return (encoding() >> log_set_size) == oset_no; } + bool is_global() const { return (encoding() >> log_set_size) == gset_no; } + + // derived registers, offsets, and addresses + Register successor() const { return as_Register(encoding() + 1); } + + int input_number() const { + assert(is_in(), "must be input register"); + return encoding() - ibase; + } + + Register after_save() const { + assert(is_out() || is_global(), "register not visible after save"); + return is_out() ? as_Register(encoding() + (ibase - obase)) : (const Register)this; + } + + Register after_restore() const { + assert(is_in() || is_global(), "register not visible after restore"); + return is_in() ? as_Register(encoding() + (obase - ibase)) : (const Register)this; + } + + int sp_offset_in_saved_window() const { + assert(is_in() || is_local(), "only i and l registers are saved in frame"); + return encoding() - lbase; + } + + inline Address address_in_saved_window() const; // implemented in assembler_sparc.hpp +}; + + +// set specific construction +inline Register as_iRegister(int number) { return as_Register(RegisterImpl::ibase + number); } +inline Register as_lRegister(int number) { return as_Register(RegisterImpl::lbase + number); } +inline Register as_oRegister(int number) { return as_Register(RegisterImpl::obase + number); } +inline Register as_gRegister(int number) { return as_Register(RegisterImpl::gbase + number); } + +// The integer registers of the SPARC architecture + +CONSTANT_REGISTER_DECLARATION(Register, noreg , (-1)); + +CONSTANT_REGISTER_DECLARATION(Register, G0 , (RegisterImpl::gbase + 0)); +CONSTANT_REGISTER_DECLARATION(Register, G1 , (RegisterImpl::gbase + 1)); +CONSTANT_REGISTER_DECLARATION(Register, G2 , (RegisterImpl::gbase + 2)); +CONSTANT_REGISTER_DECLARATION(Register, G3 , (RegisterImpl::gbase + 3)); +CONSTANT_REGISTER_DECLARATION(Register, G4 , (RegisterImpl::gbase + 4)); +CONSTANT_REGISTER_DECLARATION(Register, G5 , (RegisterImpl::gbase + 5)); +CONSTANT_REGISTER_DECLARATION(Register, G6 , (RegisterImpl::gbase + 6)); +CONSTANT_REGISTER_DECLARATION(Register, G7 , (RegisterImpl::gbase + 7)); + +CONSTANT_REGISTER_DECLARATION(Register, O0 , (RegisterImpl::obase + 0)); +CONSTANT_REGISTER_DECLARATION(Register, O1 , (RegisterImpl::obase + 1)); +CONSTANT_REGISTER_DECLARATION(Register, O2 , (RegisterImpl::obase + 2)); +CONSTANT_REGISTER_DECLARATION(Register, O3 , (RegisterImpl::obase + 3)); +CONSTANT_REGISTER_DECLARATION(Register, O4 , (RegisterImpl::obase + 4)); +CONSTANT_REGISTER_DECLARATION(Register, O5 , (RegisterImpl::obase + 5)); +CONSTANT_REGISTER_DECLARATION(Register, O6 , (RegisterImpl::obase + 6)); +CONSTANT_REGISTER_DECLARATION(Register, O7 , (RegisterImpl::obase + 7)); + +CONSTANT_REGISTER_DECLARATION(Register, L0 , (RegisterImpl::lbase + 0)); +CONSTANT_REGISTER_DECLARATION(Register, L1 , (RegisterImpl::lbase + 1)); +CONSTANT_REGISTER_DECLARATION(Register, L2 , (RegisterImpl::lbase + 2)); +CONSTANT_REGISTER_DECLARATION(Register, L3 , (RegisterImpl::lbase + 3)); +CONSTANT_REGISTER_DECLARATION(Register, L4 , (RegisterImpl::lbase + 4)); +CONSTANT_REGISTER_DECLARATION(Register, L5 , (RegisterImpl::lbase + 5)); +CONSTANT_REGISTER_DECLARATION(Register, L6 , (RegisterImpl::lbase + 6)); +CONSTANT_REGISTER_DECLARATION(Register, L7 , (RegisterImpl::lbase + 7)); + +CONSTANT_REGISTER_DECLARATION(Register, I0 , (RegisterImpl::ibase + 0)); +CONSTANT_REGISTER_DECLARATION(Register, I1 , (RegisterImpl::ibase + 1)); +CONSTANT_REGISTER_DECLARATION(Register, I2 , (RegisterImpl::ibase + 2)); +CONSTANT_REGISTER_DECLARATION(Register, I3 , (RegisterImpl::ibase + 3)); +CONSTANT_REGISTER_DECLARATION(Register, I4 , (RegisterImpl::ibase + 4)); +CONSTANT_REGISTER_DECLARATION(Register, I5 , (RegisterImpl::ibase + 5)); +CONSTANT_REGISTER_DECLARATION(Register, I6 , (RegisterImpl::ibase + 6)); +CONSTANT_REGISTER_DECLARATION(Register, I7 , (RegisterImpl::ibase + 7)); + +CONSTANT_REGISTER_DECLARATION(Register, FP , (RegisterImpl::ibase + 6)); +CONSTANT_REGISTER_DECLARATION(Register, SP , (RegisterImpl::obase + 6)); + +// Use FloatRegister as shortcut +class FloatRegisterImpl; +typedef FloatRegisterImpl* FloatRegister; + + +// construction +inline FloatRegister as_FloatRegister(int encoding) { + return (FloatRegister)(intptr_t)encoding; +} + +// The implementation of float registers for the SPARC architecture + +class FloatRegisterImpl: public AbstractRegisterImpl { + public: + enum { + number_of_registers = 64 + }; + + enum Width { + S = 1, D = 2, Q = 3 + }; + + // construction + inline VMReg as_VMReg( ); + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return value(); } + + public: + int encoding(Width w) const { + const int c = encoding(); + switch (w) { + case S: + assert(c < 32, "bad single float register"); + return c; + + case D: + assert(c < 64 && (c & 1) == 0, "bad double float register"); + return (c & 0x1e) | ((c & 0x20) >> 5); + + case Q: + assert(c < 64 && (c & 3) == 0, "bad quad float register"); + return (c & 0x1c) | ((c & 0x20) >> 5); + } + ShouldNotReachHere(); + return -1; + } + + bool is_valid() const { return 0 <= value() && value() < number_of_registers; } + bool is_even() const { return (encoding() & 1) == 0; } + + const char* name() const; + + FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } +}; + + +// The float registers of the SPARC architecture + +CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); + +CONSTANT_REGISTER_DECLARATION(FloatRegister, F0 , ( 0)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F1 , ( 1)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F2 , ( 2)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F3 , ( 3)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F4 , ( 4)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F5 , ( 5)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F6 , ( 6)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F7 , ( 7)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F8 , ( 8)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F9 , ( 9)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F10 , (10)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F11 , (11)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F12 , (12)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F13 , (13)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F14 , (14)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F15 , (15)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F16 , (16)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F17 , (17)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F18 , (18)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F19 , (19)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F20 , (20)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F21 , (21)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F22 , (22)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F23 , (23)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F24 , (24)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F25 , (25)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F26 , (26)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F27 , (27)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F28 , (28)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F29 , (29)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F30 , (30)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F31 , (31)); + +CONSTANT_REGISTER_DECLARATION(FloatRegister, F32 , (32)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F34 , (34)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F36 , (36)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F38 , (38)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F40 , (40)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F42 , (42)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F44 , (44)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F46 , (46)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F48 , (48)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F50 , (50)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F52 , (52)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F54 , (54)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F56 , (56)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F58 , (58)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F60 , (60)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, F62 , (62)); + +// Maximum number of incoming arguments that can be passed in i registers. +const int SPARC_ARGS_IN_REGS_NUM = 6; + +class ConcreteRegisterImpl : public AbstractRegisterImpl { + public: + enum { + // This number must be large enough to cover REG_COUNT (defined by c2) registers. + // There is no requirement that any ordering here matches any ordering c2 gives + // it's optoregs. + number_of_registers = 2*RegisterImpl::number_of_registers + + FloatRegisterImpl::number_of_registers + + 1 + // ccr + 4 // fcc + }; + static const int max_gpr; + static const int max_fpr; + +}; + +// Single, Double and Quad fp reg classes. These exist to map the ADLC +// encoding for a floating point register, to the FloatRegister number +// desired by the macroassembler. A FloatRegister is a number between +// 0 and 63 passed around as a pointer. For ADLC, an fp register encoding +// is the actual bit encoding used by the sparc hardware. When ADLC used +// the macroassembler to generate an instruction that references, e.g., a +// double fp reg, it passed the bit encoding to the macroassembler via +// as_FloatRegister, which, for double regs > 30, returns an illegal +// register number. +// +// Therefore we provide the following classes for use by ADLC. Their +// sole purpose is to convert from sparc register encodings to FloatRegisters. +// At some future time, we might replace FloatRegister with these classes, +// hence the definitions of as_xxxFloatRegister as class methods rather +// than as external inline routines. + +class SingleFloatRegisterImpl; +typedef SingleFloatRegisterImpl *SingleFloatRegister; + +inline FloatRegister as_SingleFloatRegister(int encoding); +class SingleFloatRegisterImpl { + public: + friend inline FloatRegister as_SingleFloatRegister(int encoding) { + assert(encoding < 32, "bad single float register encoding"); + return as_FloatRegister(encoding); + } +}; + + +class DoubleFloatRegisterImpl; +typedef DoubleFloatRegisterImpl *DoubleFloatRegister; + +inline FloatRegister as_DoubleFloatRegister(int encoding); +class DoubleFloatRegisterImpl { + public: + friend inline FloatRegister as_DoubleFloatRegister(int encoding) { + assert(encoding < 32, "bad double float register encoding"); + return as_FloatRegister( ((encoding & 1) << 5) | (encoding & 0x1e) ); + } +}; + + +class QuadFloatRegisterImpl; +typedef QuadFloatRegisterImpl *QuadFloatRegister; + +class QuadFloatRegisterImpl { + public: + friend FloatRegister as_QuadFloatRegister(int encoding) { + assert(encoding < 32 && ((encoding & 2) == 0), "bad quad float register encoding"); + return as_FloatRegister( ((encoding & 1) << 5) | (encoding & 0x1c) ); + } +}; + +#endif // CPU_SPARC_REGISTER_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/relocInfo_sparc.cpp b/src/hotspot/cpu/sparc/relocInfo_sparc.cpp --- a/src/hotspot/cpu/sparc/relocInfo_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/relocInfo_sparc.cpp 2023-04-16 11:42:11.070137925 +0000 @@ -0,0 +1,190 @@ +/* + * Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/relocInfo.hpp" +#include "nativeInst_sparc.hpp" +#include "oops/compressedOops.inline.hpp" +#include "oops/klass.inline.hpp" +#include "oops/oop.hpp" +#include "runtime/safepoint.hpp" + +void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { + NativeInstruction* ip = nativeInstruction_at(addr()); + jint inst = ip->long_at(0); + assert(inst != NativeInstruction::illegal_instruction(), "no breakpoint"); + switch (Assembler::inv_op(inst)) { + + case Assembler::ldst_op: + #ifdef ASSERT + switch (Assembler::inv_op3(inst)) { + case Assembler::lduw_op3: + case Assembler::ldub_op3: + case Assembler::lduh_op3: + case Assembler::ldd_op3: + case Assembler::ldsw_op3: + case Assembler::ldsb_op3: + case Assembler::ldsh_op3: + case Assembler::ldx_op3: + case Assembler::ldf_op3: + case Assembler::lddf_op3: + case Assembler::stw_op3: + case Assembler::stb_op3: + case Assembler::sth_op3: + case Assembler::std_op3: + case Assembler::stx_op3: + case Assembler::stf_op3: + case Assembler::stdf_op3: + case Assembler::casa_op3: + case Assembler::casxa_op3: + break; + default: + ShouldNotReachHere(); + } + goto do_non_sethi; + #endif + + case Assembler::arith_op: + #ifdef ASSERT + switch (Assembler::inv_op3(inst)) { + case Assembler::or_op3: + case Assembler::add_op3: + case Assembler::jmpl_op3: + break; + default: + ShouldNotReachHere(); + } + do_non_sethi:; + #endif + { + guarantee(Assembler::inv_immed(inst), "must have a simm13 field"); + int simm13 = Assembler::low10((intptr_t)x) + o; + guarantee(Assembler::is_simm13(simm13), "offset can't overflow simm13"); + inst &= ~Assembler::simm( -1, 13); + inst |= Assembler::simm(simm13, 13); + if (verify_only) { + guarantee(ip->long_at(0) == inst, "instructions must match"); + } else { + ip->set_long_at(0, inst); + } + } + break; + + case Assembler::branch_op: + { + jint inst2; + guarantee(Assembler::inv_op2(inst)==Assembler::sethi_op2, "must be sethi"); + if (format() != 0) { + assert(type() == relocInfo::oop_type || type() == relocInfo::metadata_type, "only narrow oops or klasses case"); + jint np = type() == relocInfo::oop_type ? CompressedOops::narrow_oop_value(cast_to_oop(x)) : CompressedKlassPointers::encode((Klass*)x); + inst &= ~Assembler::hi22(-1); + inst |= Assembler::hi22((intptr_t)np); + if (verify_only) { + guarantee(ip->long_at(0) == inst, "instructions must match"); + } else { + ip->set_long_at(0, inst); + } + inst2 = ip->long_at( NativeInstruction::nop_instruction_size ); + guarantee(Assembler::inv_op(inst2)==Assembler::arith_op, "arith op"); + if (verify_only) { + guarantee(ip->long_at(NativeInstruction::nop_instruction_size) == NativeInstruction::set_data32_simm13( inst2, (intptr_t)np), + "instructions must match"); + } else { + ip->set_long_at(NativeInstruction::nop_instruction_size, NativeInstruction::set_data32_simm13( inst2, (intptr_t)np)); + } + break; + } + if (verify_only) { + ip->verify_data64_sethi( ip->addr_at(0), (intptr_t)x ); + } else { + ip->set_data64_sethi( ip->addr_at(0), (intptr_t)x ); + } + } + break; + + default: + guarantee(false, "instruction must perform arithmetic or memory access"); + } +} + + +address Relocation::pd_call_destination(address orig_addr) { + intptr_t adj = 0; + if (orig_addr != NULL) { + // We just moved this call instruction from orig_addr to addr(). + // This means its target will appear to have grown by addr() - orig_addr. + adj = -( addr() - orig_addr ); + } + if (NativeCall::is_call_at(addr())) { + NativeCall* call = nativeCall_at(addr()); + return call->destination() + adj; + } + if (NativeFarCall::is_call_at(addr())) { + NativeFarCall* call = nativeFarCall_at(addr()); + return call->destination() + adj; + } + // Special case: Patchable branch local to the code cache. + // This will break badly if the code cache grows larger than a few Mb. + NativeGeneralJump* br = nativeGeneralJump_at(addr()); + return br->jump_destination() + adj; +} + + +void Relocation::pd_set_call_destination(address x) { + if (NativeCall::is_call_at(addr())) { + NativeCall* call = nativeCall_at(addr()); + call->set_destination(x); + return; + } + if (NativeFarCall::is_call_at(addr())) { + NativeFarCall* call = nativeFarCall_at(addr()); + call->set_destination(x); + return; + } + // Special case: Patchable branch local to the code cache. + // This will break badly if the code cache grows larger than a few Mb. + NativeGeneralJump* br = nativeGeneralJump_at(addr()); + br->set_jump_destination(x); +} + + +address* Relocation::pd_address_in_code() { + // SPARC never embeds addresses in code, at present. + //assert(type() == relocInfo::oop_type, "only oops are inlined at present"); + return (address*)addr(); +} + + +address Relocation::pd_get_address_from_code() { + // SPARC never embeds addresses in code, at present. + //assert(type() == relocInfo::oop_type, "only oops are inlined at present"); + return *(address*)addr(); +} + +void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { +} + +void metadata_Relocation::pd_fix_value(address x) { +} diff -ur --new-file a/src/hotspot/cpu/sparc/relocInfo_sparc.hpp b/src/hotspot/cpu/sparc/relocInfo_sparc.hpp --- a/src/hotspot/cpu/sparc/relocInfo_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/relocInfo_sparc.hpp 2023-04-16 11:42:11.070248451 +0000 @@ -0,0 +1,57 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_RELOCINFO_SPARC_HPP +#define CPU_SPARC_RELOCINFO_SPARC_HPP + + // machine-dependent parts of class relocInfo + private: + enum { + // Since SPARC instructions are whole words, + // the two low-order offset bits can always be discarded. + offset_unit = 4, + + // There is no need for format bits; the instructions are + // sufficiently self-identifying. + // Except narrow oops in 64-bits VM. + format_width = 1 + }; + + public: + + // This platform has no oops in the code that are not also + // listed in the oop section. + static bool mustIterateImmediateOopsInCode() { return false; } + +//Reconciliation History +// 1.3 97/10/15 15:38:36 relocInfo_i486.hpp +// 1.4 97/12/08 16:01:06 relocInfo_i486.hpp +// 1.5 98/01/23 01:34:55 relocInfo_i486.hpp +// 1.6 98/02/27 15:44:53 relocInfo_i486.hpp +// 1.6 98/03/12 14:47:13 relocInfo_i486.hpp +// 1.8 99/06/22 16:37:50 relocInfo_i486.hpp +// 1.9 99/07/16 11:12:11 relocInfo_i486.hpp +//End + +#endif // CPU_SPARC_RELOCINFO_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/runtime_sparc.cpp b/src/hotspot/cpu/sparc/runtime_sparc.cpp --- a/src/hotspot/cpu/sparc/runtime_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/runtime_sparc.cpp 2023-04-16 11:42:11.070414071 +0000 @@ -0,0 +1,159 @@ +/* + * Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#ifdef COMPILER2 +#include "asm/macroAssembler.inline.hpp" +#include "code/vmreg.hpp" +#include "compiler/oopMap.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_sparc.hpp" +#include "opto/runtime.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/globalDefinitions.hpp" +#include "vmreg_sparc.inline.hpp" +#endif + + +#define __ masm-> + +//------------------------------ generate_exception_blob --------------------------- +// creates exception blob at the end +// Using exception blob, this code is jumped from a compiled method. +// (see emit_exception_handler in sparc.ad file) +// +// Given an exception pc at a call we call into the runtime for the +// handler in this method. This handler might merely restore state +// (i.e. callee save registers) unwind the frame and jump to the +// exception handler for the nmethod if there is no Java level handler +// for the nmethod. +// +// This code is entered with a jmp. +// +// Arguments: +// O0: exception oop +// O1: exception pc +// +// Results: +// O0: exception oop +// O1: exception pc in caller or ??? +// destination: exception handler of caller +// +// Note: the exception pc MUST be at a call (precise debug information) +// +void OptoRuntime::generate_exception_blob() { + // allocate space for code + ResourceMark rm; + int pad = VerifyThread ? 256 : 0;// Extra slop space for more verify code + + // setup code generation tools + // Measured 8/7/03 at 256 in 32bit debug build (no VerifyThread) + // Measured 8/7/03 at 528 in 32bit debug build (VerifyThread) + CodeBuffer buffer("exception_blob", 600+pad, 512); + MacroAssembler* masm = new MacroAssembler(&buffer); + + int framesize_in_bytes = __ total_frame_size_in_bytes(0); + int framesize_in_words = framesize_in_bytes / wordSize; + int framesize_in_slots = framesize_in_bytes / sizeof(jint); + + Label L; + + int start = __ offset(); + + __ verify_thread(); + __ st_ptr(Oexception, G2_thread, JavaThread::exception_oop_offset()); + __ st_ptr(Oissuing_pc, G2_thread, JavaThread::exception_pc_offset()); + + // This call does all the hard work. It checks if an exception catch + // exists in the method. + // If so, it returns the handler address. + // If the nmethod has been deoptimized and it had a handler the handler + // address is the deopt blob unpack_with_exception entry. + // + // If no handler exists it prepares for stack-unwinding, restoring the callee-save + // registers of the frame being removed. + // + __ save_frame(0); + + __ mov(G2_thread, O0); + __ set_last_Java_frame(SP, noreg); + __ save_thread(L7_thread_cache); + + // This call can block at exit and nmethod can be deoptimized at that + // point. If the nmethod had a catch point we would jump to the + // now deoptimized catch point and fall thru the vanilla deopt + // path and lose the exception + // Sure would be simpler if this call didn't block! + __ call(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C), relocInfo::runtime_call_type); + __ delayed()->mov(L7_thread_cache, O0); + + // Set an oopmap for the call site. This oopmap will only be used if we + // are unwinding the stack. Hence, all locations will be dead. + // Callee-saved registers will be the same as the frame above (i.e., + // handle_exception_stub), since they were restored when we got the + // exception. + + OopMapSet *oop_maps = new OopMapSet(); + oop_maps->add_gc_map( __ offset()-start, new OopMap(framesize_in_slots, 0)); + + __ bind(L); + __ restore_thread(L7_thread_cache); + __ reset_last_Java_frame(); + + __ mov(O0, G3_scratch); // Move handler address to temp + __ restore(); + + // Restore SP from L7 if the exception PC is a MethodHandle call site. + __ lduw(Address(G2_thread, JavaThread::is_method_handle_return_offset()), O7); + __ tst(O7); + __ movcc(Assembler::notZero, false, Assembler::icc, L7_mh_SP_save, SP); + + // G3_scratch contains handler address + // Since this may be the deopt blob we must set O7 to look like we returned + // from the original pc that threw the exception + + __ ld_ptr(G2_thread, JavaThread::exception_pc_offset(), O7); + __ sub(O7, frame::pc_return_offset, O7); + + + assert(Assembler::is_simm13(in_bytes(JavaThread::exception_oop_offset())), "exception offset overflows simm13, following ld instruction cannot be in delay slot"); + __ ld_ptr(G2_thread, JavaThread::exception_oop_offset(), Oexception); // O0 +#ifdef ASSERT + __ st_ptr(G0, G2_thread, JavaThread::exception_handler_pc_offset()); + __ st_ptr(G0, G2_thread, JavaThread::exception_pc_offset()); +#endif + __ JMP(G3_scratch, 0); + // Clear the exception oop so GC no longer processes it as a root. + __ delayed()->st_ptr(G0, G2_thread, JavaThread::exception_oop_offset()); + + // ------------- + // make sure all code is generated + masm->flush(); + + _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize_in_words); +} diff -ur --new-file a/src/hotspot/cpu/sparc/sharedRuntime_sparc.cpp b/src/hotspot/cpu/sparc/sharedRuntime_sparc.cpp --- a/src/hotspot/cpu/sparc/sharedRuntime_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/sharedRuntime_sparc.cpp 2023-04-16 11:42:11.071495347 +0000 @@ -0,0 +1,3112 @@ +/* + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/debugInfoRec.hpp" +#include "code/icBuffer.hpp" +#include "code/vtableStubs.hpp" +#include "compiler/oopMap.hpp" +#include "gc/shared/gcLocker.hpp" +#include "interpreter/interpreter.hpp" +#include "logging/log.hpp" +#include "memory/resourceArea.hpp" +#include "oops/compiledICHolder.hpp" +#include "oops/klass.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/jniHandles.hpp" +#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/signature.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/align.hpp" +#include "vmreg_sparc.inline.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#endif +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif +#if INCLUDE_JVMCI +#include "jvmci/jvmciJavaClasses.hpp" +#endif + +#define __ masm-> + + +class RegisterSaver { + + // Used for saving volatile registers. This is Gregs, Fregs, I/L/O. + // The Oregs are problematic. In the 32bit build the compiler can + // have O registers live with 64 bit quantities. A window save will + // cut the heads off of the registers. We have to do a very extensive + // stack dance to save and restore these properly. + + // Note that the Oregs problem only exists if we block at either a polling + // page exception a compiled code safepoint that was not originally a call + // or deoptimize following one of these kinds of safepoints. + + // Lots of registers to save. For all builds, a window save will preserve + // the %i and %l registers. For the 32-bit longs-in-two entries and 64-bit + // builds a window-save will preserve the %o registers. In the LION build + // we need to save the 64-bit %o registers which requires we save them + // before the window-save (as then they become %i registers and get their + // heads chopped off on interrupt). We have to save some %g registers here + // as well. + enum { + // This frame's save area. Includes extra space for the native call: + // vararg's layout space and the like. Briefly holds the caller's + // register save area. + call_args_area = frame::register_save_words_sp_offset + + frame::memory_parameter_word_sp_offset*wordSize, + // Make sure save locations are always 8 byte aligned. + // can't use align_up because it doesn't produce compile time constant + start_of_extra_save_area = ((call_args_area + 7) & ~7), + g1_offset = start_of_extra_save_area, // g-regs needing saving + g3_offset = g1_offset+8, + g4_offset = g3_offset+8, + g5_offset = g4_offset+8, + o0_offset = g5_offset+8, + o1_offset = o0_offset+8, + o2_offset = o1_offset+8, + o3_offset = o2_offset+8, + o4_offset = o3_offset+8, + o5_offset = o4_offset+8, + start_of_flags_save_area = o5_offset+8, + ccr_offset = start_of_flags_save_area, + fsr_offset = ccr_offset + 8, + d00_offset = fsr_offset+8, // Start of float save area + register_save_size = d00_offset+8*32 + }; + + + public: + + static int Oexception_offset() { return o0_offset; }; + static int G3_offset() { return g3_offset; }; + static int G5_offset() { return g5_offset; }; + static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); + static void restore_live_registers(MacroAssembler* masm); + + // During deoptimization only the result register need to be restored + // all the other values have already been extracted. + + static void restore_result_registers(MacroAssembler* masm); +}; + +OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { + // Record volatile registers as callee-save values in an OopMap so their save locations will be + // propagated to the caller frame's RegisterMap during StackFrameStream construction (needed for + // deoptimization; see compiledVFrame::create_stack_value). The caller's I, L and O registers + // are saved in register windows - I's and L's in the caller's frame and O's in the stub frame + // (as the stub's I's) when the runtime routine called by the stub creates its frame. + int i; + // Always make the frame size 16 byte aligned. + int frame_size = align_up(additional_frame_words + register_save_size, 16); + // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words + int frame_size_in_slots = frame_size / sizeof(jint); + // CodeBlob frame size is in words. + *total_frame_words = frame_size / wordSize; + // OopMap* map = new OopMap(*total_frame_words, 0); + OopMap* map = new OopMap(frame_size_in_slots, 0); + + __ save(SP, -frame_size, SP); + + + int debug_offset = 0; + // Save the G's + __ stx(G1, SP, g1_offset+STACK_BIAS); + map->set_callee_saved(VMRegImpl::stack2reg((g1_offset + debug_offset)>>2), G1->as_VMReg()); + + __ stx(G3, SP, g3_offset+STACK_BIAS); + map->set_callee_saved(VMRegImpl::stack2reg((g3_offset + debug_offset)>>2), G3->as_VMReg()); + + __ stx(G4, SP, g4_offset+STACK_BIAS); + map->set_callee_saved(VMRegImpl::stack2reg((g4_offset + debug_offset)>>2), G4->as_VMReg()); + + __ stx(G5, SP, g5_offset+STACK_BIAS); + map->set_callee_saved(VMRegImpl::stack2reg((g5_offset + debug_offset)>>2), G5->as_VMReg()); + + // This is really a waste but we'll keep things as they were for now + if (true) { + } + + + // Save the flags + __ rdccr( G5 ); + __ stx(G5, SP, ccr_offset+STACK_BIAS); + __ stxfsr(SP, fsr_offset+STACK_BIAS); + + // Save all the FP registers: 32 doubles (32 floats correspond to the 2 halves of the first 16 doubles) + int offset = d00_offset; + for( int i=0; iset_callee_saved(VMRegImpl::stack2reg(offset>>2), f->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg((offset + sizeof(float))>>2), f->as_VMReg()->next()); + offset += sizeof(double); + } + + // And we're done. + + return map; +} + + +// Pop the current frame and restore all the registers that we +// saved. +void RegisterSaver::restore_live_registers(MacroAssembler* masm) { + + // Restore all the FP registers + for( int i=0; i 8; +} + +// The java_calling_convention describes stack locations as ideal slots on +// a frame with no abi restrictions. Since we must observe abi restrictions +// (like the placement of the register window) the slots must be biased by +// the following value. +static int reg2offset(VMReg r) { + return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; +} + +static VMRegPair reg64_to_VMRegPair(Register r) { + VMRegPair ret; + if (wordSize == 8) { + ret.set2(r->as_VMReg()); + } else { + ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg()); + } + return ret; +} + +// --------------------------------------------------------------------------- +// Read the array of BasicTypes from a signature, and compute where the +// arguments should go. Values in the VMRegPair regs array refer to 4-byte (VMRegImpl::stack_slot_size) +// quantities. Values less than VMRegImpl::stack0 are registers, those above +// refer to 4-byte stack slots. All stack slots are based off of the window +// top. VMRegImpl::stack0 refers to the first slot past the 16-word window, +// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register +// values 0-63 (up to RegisterImpl::number_of_registers) are the 64-bit +// integer registers. Values 64-95 are the (32-bit only) float registers. +// Each 32-bit quantity is given its own number, so the integer registers +// (in either 32- or 64-bit builds) use 2 numbers. For example, there is +// an O0-low and an O0-high. Essentially, all int register numbers are doubled. + +// Register results are passed in O0-O5, for outgoing call arguments. To +// convert to incoming arguments, convert all O's to I's. The regs array +// refer to the low and hi 32-bit words of 64-bit registers or stack slots. +// If the regs[].second() field is set to VMRegImpl::Bad(), it means it's unused (a +// 32-bit value was passed). If both are VMRegImpl::Bad(), it means no value was +// passed (used as a placeholder for the other half of longs and doubles in +// the 64-bit build). regs[].second() is either VMRegImpl::Bad() or regs[].second() is +// regs[].first()+1 (regs[].first() may be misaligned in the C calling convention). +// Sparc never passes a value in regs[].second() but not regs[].first() (regs[].first() +// == VMRegImpl::Bad() && regs[].second() != VMRegImpl::Bad()) nor unrelated values in the +// same VMRegPair. + +// Note: the INPUTS in sig_bt are in units of Java argument words, which are +// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit +// units regardless of build. + + +// --------------------------------------------------------------------------- +// The compiled Java calling convention. The Java convention always passes +// 64-bit values in adjacent aligned locations (either registers or stack), +// floats in float registers and doubles in aligned float pairs. There is +// no backing varargs store for values in registers. +// In the 32-bit build, longs are passed on the stack (cannot be +// passed in I's, because longs in I's get their heads chopped off at +// interrupt). +int SharedRuntime::java_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, + int total_args_passed, + int is_outgoing) { + assert(F31->as_VMReg()->is_reg(), "overlapping stack/register numbers"); + + const int int_reg_max = SPARC_ARGS_IN_REGS_NUM; + const int flt_reg_max = 8; + + int int_reg = 0; + int flt_reg = 0; + int slot = 0; + + for (int i = 0; i < total_args_passed; i++) { + switch (sig_bt[i]) { + case T_INT: + case T_SHORT: + case T_CHAR: + case T_BYTE: + case T_BOOLEAN: + if (int_reg < int_reg_max) { + Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++); + regs[i].set1(r->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(slot++)); + } + break; + + case T_LONG: + assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting VOID in other half"); + // fall-through + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address + if (int_reg < int_reg_max) { + Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++); + regs[i].set2(r->as_VMReg()); + } else { + slot = align_up(slot, 2); // align + regs[i].set2(VMRegImpl::stack2reg(slot)); + slot += 2; + } + break; + + case T_FLOAT: + if (flt_reg < flt_reg_max) { + FloatRegister r = as_FloatRegister(flt_reg++); + regs[i].set1(r->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(slot++)); + } + break; + + case T_DOUBLE: + assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half"); + if (align_up(flt_reg, 2) + 1 < flt_reg_max) { + flt_reg = align_up(flt_reg, 2); // align + FloatRegister r = as_FloatRegister(flt_reg); + regs[i].set2(r->as_VMReg()); + flt_reg += 2; + } else { + slot = align_up(slot, 2); // align + regs[i].set2(VMRegImpl::stack2reg(slot)); + slot += 2; + } + break; + + case T_VOID: + regs[i].set_bad(); // Halves of longs & doubles + break; + + default: + fatal("unknown basic type %d", sig_bt[i]); + break; + } + } + + // return the amount of stack space these arguments will need. + return slot; +} + +// Helper class mostly to avoid passing masm everywhere, and handle +// store displacement overflow logic. +class AdapterGenerator { + MacroAssembler *masm; + Register Rdisp; + void set_Rdisp(Register r) { Rdisp = r; } + + void patch_callers_callsite(); + + // base+st_off points to top of argument + int arg_offset(const int st_off) { return st_off; } + int next_arg_offset(const int st_off) { + return st_off - Interpreter::stackElementSize; + } + + // Argument slot values may be loaded first into a register because + // they might not fit into displacement. + RegisterOrConstant arg_slot(const int st_off); + RegisterOrConstant next_arg_slot(const int st_off); + + // Stores long into offset pointed to by base + void store_c2i_long(Register r, Register base, + const int st_off, bool is_stack); + void store_c2i_object(Register r, Register base, + const int st_off); + void store_c2i_int(Register r, Register base, + const int st_off); + void store_c2i_double(VMReg r_2, + VMReg r_1, Register base, const int st_off); + void store_c2i_float(FloatRegister f, Register base, + const int st_off); + + public: + void gen_c2i_adapter(int total_args_passed, + // VMReg max_arg, + int comp_args_on_stack, // VMRegStackSlots + const BasicType *sig_bt, + const VMRegPair *regs, + Label& skip_fixup); + void gen_i2c_adapter(int total_args_passed, + // VMReg max_arg, + int comp_args_on_stack, // VMRegStackSlots + const BasicType *sig_bt, + const VMRegPair *regs); + + AdapterGenerator(MacroAssembler *_masm) : masm(_masm) {} +}; + + +// Patch the callers callsite with entry to compiled code if it exists. +void AdapterGenerator::patch_callers_callsite() { + Label L; + __ ld_ptr(G5_method, in_bytes(Method::code_offset()), G3_scratch); + __ br_null(G3_scratch, false, Assembler::pt, L); + __ delayed()->nop(); + // Call into the VM to patch the caller, then jump to compiled callee + __ save_frame(4); // Args in compiled layout; do not blow them + + // Must save all the live Gregs the list is: + // G1: 1st Long arg (32bit build) + // G2: global allocated to TLS + // G3: used in inline cache check (scratch) + // G4: 2nd Long arg (32bit build); + // G5: used in inline cache check (Method*) + + // The longs must go to the stack by hand since in the 32 bit build they can be trashed by window ops. + + // mov(s,d) + __ mov(G1, L1); + __ mov(G4, L4); + __ mov(G5_method, L5); + __ mov(G5_method, O0); // VM needs target method + __ mov(I7, O1); // VM needs caller's callsite + // Must be a leaf call... + // can be very far once the blob has been relocated + AddressLiteral dest(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)); + __ relocate(relocInfo::runtime_call_type); + __ jumpl_to(dest, O7, O7); + __ delayed()->mov(G2_thread, L7_thread_cache); + __ mov(L7_thread_cache, G2_thread); + __ mov(L1, G1); + __ mov(L4, G4); + __ mov(L5, G5_method); + + __ restore(); // Restore args + __ bind(L); +} + + +RegisterOrConstant AdapterGenerator::arg_slot(const int st_off) { + RegisterOrConstant roc(arg_offset(st_off)); + return __ ensure_simm13_or_reg(roc, Rdisp); +} + +RegisterOrConstant AdapterGenerator::next_arg_slot(const int st_off) { + RegisterOrConstant roc(next_arg_offset(st_off)); + return __ ensure_simm13_or_reg(roc, Rdisp); +} + + +// Stores long into offset pointed to by base +void AdapterGenerator::store_c2i_long(Register r, Register base, + const int st_off, bool is_stack) { + // In V9, longs are given 2 64-bit slots in the interpreter, but the + // data is passed in only 1 slot. + __ stx(r, base, next_arg_slot(st_off)); +} + +void AdapterGenerator::store_c2i_object(Register r, Register base, + const int st_off) { + __ st_ptr (r, base, arg_slot(st_off)); +} + +void AdapterGenerator::store_c2i_int(Register r, Register base, + const int st_off) { + __ st (r, base, arg_slot(st_off)); +} + +// Stores into offset pointed to by base +void AdapterGenerator::store_c2i_double(VMReg r_2, + VMReg r_1, Register base, const int st_off) { + // In V9, doubles are given 2 64-bit slots in the interpreter, but the + // data is passed in only 1 slot. + __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), base, next_arg_slot(st_off)); +} + +void AdapterGenerator::store_c2i_float(FloatRegister f, Register base, + const int st_off) { + __ stf(FloatRegisterImpl::S, f, base, arg_slot(st_off)); +} + +void AdapterGenerator::gen_c2i_adapter( + int total_args_passed, + // VMReg max_arg, + int comp_args_on_stack, // VMRegStackSlots + const BasicType *sig_bt, + const VMRegPair *regs, + Label& L_skip_fixup) { + + // Before we get into the guts of the C2I adapter, see if we should be here + // at all. We've come from compiled code and are attempting to jump to the + // interpreter, which means the caller made a static call to get here + // (vcalls always get a compiled target if there is one). Check for a + // compiled target. If there is one, we need to patch the caller's call. + // However we will run interpreted if we come thru here. The next pass + // thru the call site will run compiled. If we ran compiled here then + // we can (theoretically) do endless i2c->c2i->i2c transitions during + // deopt/uncommon trap cycles. If we always go interpreted here then + // we can have at most one and don't need to play any tricks to keep + // from endlessly growing the stack. + // + // Actually if we detected that we had an i2c->c2i transition here we + // ought to be able to reset the world back to the state of the interpreted + // call and not bother building another interpreter arg area. We don't + // do that at this point. + + patch_callers_callsite(); + + __ bind(L_skip_fixup); + + // Since all args are passed on the stack, total_args_passed*wordSize is the + // space we need. Add in varargs area needed by the interpreter. Round up + // to stack alignment. + const int arg_size = total_args_passed * Interpreter::stackElementSize; + const int varargs_area = + (frame::varargs_offset - frame::register_save_words)*wordSize; + const int extraspace = align_up(arg_size + varargs_area, 2*wordSize); + + const int bias = STACK_BIAS; + const int interp_arg_offset = frame::varargs_offset*wordSize + + (total_args_passed-1)*Interpreter::stackElementSize; + + const Register base = SP; + + // Make some extra space on the stack. + __ sub(SP, __ ensure_simm13_or_reg(extraspace, G3_scratch), SP); + set_Rdisp(G3_scratch); + + // Write the args into the outgoing interpreter space. + for (int i = 0; i < total_args_passed; i++) { + const int st_off = interp_arg_offset - (i*Interpreter::stackElementSize) + bias; + VMReg r_1 = regs[i].first(); + VMReg r_2 = regs[i].second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + continue; + } + if (r_1->is_stack()) { // Pretend stack targets are loaded into G1 + RegisterOrConstant ld_off = reg2offset(r_1) + extraspace + bias; + ld_off = __ ensure_simm13_or_reg(ld_off, Rdisp); + r_1 = G1_scratch->as_VMReg();// as part of the load/store shuffle + if (!r_2->is_valid()) __ ld (base, ld_off, G1_scratch); + else __ ldx(base, ld_off, G1_scratch); + } + + if (r_1->is_Register()) { + Register r = r_1->as_Register()->after_restore(); + if (is_reference_type(sig_bt[i])) { + store_c2i_object(r, base, st_off); + } else if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { + store_c2i_long(r, base, st_off, r_2->is_stack()); + } else { + store_c2i_int(r, base, st_off); + } + } else { + assert(r_1->is_FloatRegister(), ""); + if (sig_bt[i] == T_FLOAT) { + store_c2i_float(r_1->as_FloatRegister(), base, st_off); + } else { + assert(sig_bt[i] == T_DOUBLE, "wrong type"); + store_c2i_double(r_2, r_1, base, st_off); + } + } + } + + // Load the interpreter entry point. + __ ld_ptr(G5_method, in_bytes(Method::interpreter_entry_offset()), G3_scratch); + + // Pass O5_savedSP as an argument to the interpreter. + // The interpreter will restore SP to this value before returning. + __ add(SP, __ ensure_simm13_or_reg(extraspace, G1), O5_savedSP); + + __ mov((frame::varargs_offset)*wordSize - + 1*Interpreter::stackElementSize+bias+BytesPerWord, G1); + // Jump to the interpreter just as if interpreter was doing it. + __ jmpl(G3_scratch, 0, G0); + // Setup Lesp for the call. Cannot actually set Lesp as the current Lesp + // (really L0) is in use by the compiled frame as a generic temp. However, + // the interpreter does not know where its args are without some kind of + // arg pointer being passed in. Pass it in Gargs. + __ delayed()->add(SP, G1, Gargs); +} + +static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg, Register temp2_reg, + address code_start, address code_end, + Label& L_ok) { + Label L_fail; + __ set(ExternalAddress(code_start), temp_reg); + __ set(pointer_delta(code_end, code_start, 1), temp2_reg); + __ cmp(pc_reg, temp_reg); + __ brx(Assembler::lessEqualUnsigned, false, Assembler::pn, L_fail); + __ delayed()->add(temp_reg, temp2_reg, temp_reg); + __ cmp(pc_reg, temp_reg); + __ cmp_and_brx_short(pc_reg, temp_reg, Assembler::lessUnsigned, Assembler::pt, L_ok); + __ bind(L_fail); +} + +void AdapterGenerator::gen_i2c_adapter(int total_args_passed, + // VMReg max_arg, + int comp_args_on_stack, // VMRegStackSlots + const BasicType *sig_bt, + const VMRegPair *regs) { + // Generate an I2C adapter: adjust the I-frame to make space for the C-frame + // layout. Lesp was saved by the calling I-frame and will be restored on + // return. Meanwhile, outgoing arg space is all owned by the callee + // C-frame, so we can mangle it at will. After adjusting the frame size, + // hoist register arguments and repack other args according to the compiled + // code convention. Finally, end in a jump to the compiled code. The entry + // point address is the start of the buffer. + + // We will only enter here from an interpreted frame and never from after + // passing thru a c2i. Azul allowed this but we do not. If we lose the + // race and use a c2i we will remain interpreted for the race loser(s). + // This removes all sorts of headaches on the x86 side and also eliminates + // the possibility of having c2i -> i2c -> c2i -> ... endless transitions. + + // More detail: + // Adapters can be frameless because they do not require the caller + // to perform additional cleanup work, such as correcting the stack pointer. + // An i2c adapter is frameless because the *caller* frame, which is interpreted, + // routinely repairs its own stack pointer (from interpreter_frame_last_sp), + // even if a callee has modified the stack pointer. + // A c2i adapter is frameless because the *callee* frame, which is interpreted, + // routinely repairs its caller's stack pointer (from sender_sp, which is set + // up via the senderSP register). + // In other words, if *either* the caller or callee is interpreted, we can + // get the stack pointer repaired after a call. + // This is why c2i and i2c adapters cannot be indefinitely composed. + // In particular, if a c2i adapter were to somehow call an i2c adapter, + // both caller and callee would be compiled methods, and neither would + // clean up the stack pointer changes performed by the two adapters. + // If this happens, control eventually transfers back to the compiled + // caller, but with an uncorrected stack, causing delayed havoc. + + if (VerifyAdapterCalls && + (Interpreter::code() != NULL || StubRoutines::code1() != NULL)) { + // So, let's test for cascading c2i/i2c adapters right now. + // assert(Interpreter::contains($return_addr) || + // StubRoutines::contains($return_addr), + // "i2c adapter must return to an interpreter frame"); + __ block_comment("verify_i2c { "); + Label L_ok; + if (Interpreter::code() != NULL) + range_check(masm, O7, O0, O1, + Interpreter::code()->code_start(), Interpreter::code()->code_end(), + L_ok); + if (StubRoutines::code1() != NULL) + range_check(masm, O7, O0, O1, + StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(), + L_ok); + if (StubRoutines::code2() != NULL) + range_check(masm, O7, O0, O1, + StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(), + L_ok); + const char* msg = "i2c adapter must return to an interpreter frame"; + __ block_comment(msg); + __ stop(msg); + __ bind(L_ok); + __ block_comment("} verify_i2ce "); + } + + // As you can see from the list of inputs & outputs there are not a lot + // of temp registers to work with: mostly G1, G3 & G4. + + // Inputs: + // G2_thread - TLS + // G5_method - Method oop + // G4 (Gargs) - Pointer to interpreter's args + // O0..O4 - free for scratch + // O5_savedSP - Caller's saved SP, to be restored if needed + // O6 - Current SP! + // O7 - Valid return address + // L0-L7, I0-I7 - Caller's temps (no frame pushed yet) + + // Outputs: + // G2_thread - TLS + // O0-O5 - Outgoing args in compiled layout + // O6 - Adjusted or restored SP + // O7 - Valid return address + // L0-L7, I0-I7 - Caller's temps (no frame pushed yet) + // F0-F7 - more outgoing args + + + // Gargs is the incoming argument base, and also an outgoing argument. + __ sub(Gargs, BytesPerWord, Gargs); + + // ON ENTRY TO THE CODE WE ARE MAKING, WE HAVE AN INTERPRETED FRAME + // WITH O7 HOLDING A VALID RETURN PC + // + // | | + // : java stack : + // | | + // +--------------+ <--- start of outgoing args + // | receiver | | + // : rest of args : |---size is java-arg-words + // | | | + // +--------------+ <--- O4_args (misaligned) and Lesp if prior is not C2I + // | | | + // : unused : |---Space for max Java stack, plus stack alignment + // | | | + // +--------------+ <--- SP + 16*wordsize + // | | + // : window : + // | | + // +--------------+ <--- SP + + // WE REPACK THE STACK. We use the common calling convention layout as + // discovered by calling SharedRuntime::calling_convention. We assume it + // causes an arbitrary shuffle of memory, which may require some register + // temps to do the shuffle. We hope for (and optimize for) the case where + // temps are not needed. We may have to resize the stack slightly, in case + // we need alignment padding (32-bit interpreter can pass longs & doubles + // misaligned, but the compilers expect them aligned). + // + // | | + // : java stack : + // | | + // +--------------+ <--- start of outgoing args + // | pad, align | | + // +--------------+ | + // | ints, longs, | | + // | floats, | |---Outgoing stack args. + // : doubles : | First few args in registers. + // | | | + // +--------------+ <--- SP' + 16*wordsize + // | | + // : window : + // | | + // +--------------+ <--- SP' + + // ON EXIT FROM THE CODE WE ARE MAKING, WE STILL HAVE AN INTERPRETED FRAME + // WITH O7 HOLDING A VALID RETURN PC - ITS JUST THAT THE ARGS ARE NOW SETUP + // FOR COMPILED CODE AND THE FRAME SLIGHTLY GROWN. + + // Cut-out for having no stack args. Since up to 6 args are passed + // in registers, we will commonly have no stack args. + if (comp_args_on_stack > 0) { + // Convert VMReg stack slots to words. + int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; + // Round up to minimum stack alignment, in wordSize + comp_words_on_stack = align_up(comp_words_on_stack, 2); + // Now compute the distance from Lesp to SP. This calculation does not + // include the space for total_args_passed because Lesp has not yet popped + // the arguments. + __ sub(SP, (comp_words_on_stack)*wordSize, SP); + } + + // Now generate the shuffle code. Pick up all register args and move the + // rest through G1_scratch. + for (int i = 0; i < total_args_passed; i++) { + if (sig_bt[i] == T_VOID) { + // Longs and doubles are passed in native word order, but misaligned + // in the 32-bit build. + assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + continue; + } + + // Pick up 0, 1 or 2 words from Lesp+offset. Assume mis-aligned in the + // 32-bit build and aligned in the 64-bit build. Look for the obvious + // ldx/lddf optimizations. + + // Load in argument order going down. + const int ld_off = (total_args_passed-i)*Interpreter::stackElementSize; + set_Rdisp(G1_scratch); + + VMReg r_1 = regs[i].first(); + VMReg r_2 = regs[i].second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + continue; + } + if (r_1->is_stack()) { // Pretend stack targets are loaded into F8/F9 + r_1 = F8->as_VMReg(); // as part of the load/store shuffle + if (r_2->is_valid()) r_2 = r_1->next(); + } + if (r_1->is_Register()) { // Register argument + Register r = r_1->as_Register()->after_restore(); + if (!r_2->is_valid()) { + __ ld(Gargs, arg_slot(ld_off), r); + } else { + // In V9, longs are given 2 64-bit slots in the interpreter, but the + // data is passed in only 1 slot. + RegisterOrConstant slot = (sig_bt[i] == T_LONG) ? + next_arg_slot(ld_off) : arg_slot(ld_off); + __ ldx(Gargs, slot, r); + } + } else { + assert(r_1->is_FloatRegister(), ""); + if (!r_2->is_valid()) { + __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_1->as_FloatRegister()); + } else { + // In V9, doubles are given 2 64-bit slots in the interpreter, but the + // data is passed in only 1 slot. This code also handles longs that + // are passed on the stack, but need a stack-to-stack move through a + // spare float register. + RegisterOrConstant slot = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? + next_arg_slot(ld_off) : arg_slot(ld_off); + __ ldf(FloatRegisterImpl::D, Gargs, slot, r_1->as_FloatRegister()); + } + } + // Was the argument really intended to be on the stack, but was loaded + // into F8/F9? + if (regs[i].first()->is_stack()) { + assert(r_1->as_FloatRegister() == F8, "fix this code"); + // Convert stack slot to an SP offset + int st_off = reg2offset(regs[i].first()) + STACK_BIAS; + // Store down the shuffled stack word. Target address _is_ aligned. + RegisterOrConstant slot = __ ensure_simm13_or_reg(st_off, Rdisp); + if (!r_2->is_valid()) __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), SP, slot); + else __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), SP, slot); + } + } + + // Jump to the compiled code just as if compiled code was doing it. + __ ld_ptr(G5_method, in_bytes(Method::from_compiled_offset()), G3); +#if INCLUDE_JVMCI + if (EnableJVMCI) { + // check if this call should be routed towards a specific entry point + __ ld(Address(G2_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), G1); + __ cmp(G0, G1); + Label no_alternative_target; + __ br(Assembler::equal, false, Assembler::pn, no_alternative_target); + __ delayed()->nop(); + + __ ld_ptr(G2_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset()), G3); + __ st_ptr(G0, Address(G2_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset()))); + + __ bind(no_alternative_target); + } +#endif // INCLUDE_JVMCI + + // 6243940 We might end up in handle_wrong_method if + // the callee is deoptimized as we race thru here. If that + // happens we don't want to take a safepoint because the + // caller frame will look interpreted and arguments are now + // "compiled" so it is much better to make this transition + // invisible to the stack walking code. Unfortunately if + // we try and find the callee by normal means a safepoint + // is possible. So we stash the desired callee in the thread + // and the vm will find there should this case occur. + Address callee_target_addr(G2_thread, JavaThread::callee_target_offset()); + __ st_ptr(G5_method, callee_target_addr); + __ jmpl(G3, 0, G0); + __ delayed()->nop(); +} + +void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs) { + AdapterGenerator agen(masm); + agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs); +} + +// --------------------------------------------------------------- +AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, + int total_args_passed, + // VMReg max_arg, + int comp_args_on_stack, // VMRegStackSlots + const BasicType *sig_bt, + const VMRegPair *regs, + AdapterFingerPrint* fingerprint) { + address i2c_entry = __ pc(); + + gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); + + + // ------------------------------------------------------------------------- + // Generate a C2I adapter. On entry we know G5 holds the Method*. The + // args start out packed in the compiled layout. They need to be unpacked + // into the interpreter layout. This will almost always require some stack + // space. We grow the current (compiled) stack, then repack the args. We + // finally end in a jump to the generic interpreter entry point. On exit + // from the interpreter, the interpreter will restore our SP (lest the + // compiled code, which relies solely on SP and not FP, get sick). + + address c2i_unverified_entry = __ pc(); + Label L_skip_fixup; + { + Register R_temp = G1; // another scratch register + + AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub()); + + __ verify_oop(O0); + __ load_klass(O0, G3_scratch); + + __ ld_ptr(G5_method, CompiledICHolder::holder_klass_offset(), R_temp); + __ cmp(G3_scratch, R_temp); + + Label ok, ok2; + __ brx(Assembler::equal, false, Assembler::pt, ok); + __ delayed()->ld_ptr(G5_method, CompiledICHolder::holder_metadata_offset(), G5_method); + __ jump_to(ic_miss, G3_scratch); + __ delayed()->nop(); + + __ bind(ok); + // Method might have been compiled since the call site was patched to + // interpreted if that is the case treat it as a miss so we can get + // the call site corrected. + __ ld_ptr(G5_method, in_bytes(Method::code_offset()), G3_scratch); + __ bind(ok2); + __ br_null(G3_scratch, false, Assembler::pt, L_skip_fixup); + __ delayed()->nop(); + __ jump_to(ic_miss, G3_scratch); + __ delayed()->nop(); + + } + + address c2i_entry = __ pc(); + AdapterGenerator agen(masm); + agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, L_skip_fixup); + + __ flush(); + return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); + +} + +// Helper function for native calling conventions +static VMReg int_stk_helper( int i ) { + // Bias any stack based VMReg we get by ignoring the window area + // but not the register parameter save area. + // + // This is strange for the following reasons. We'd normally expect + // the calling convention to return an VMReg for a stack slot + // completely ignoring any abi reserved area. C2 thinks of that + // abi area as only out_preserve_stack_slots. This does not include + // the area allocated by the C abi to store down integer arguments + // because the java calling convention does not use it. So + // since c2 assumes that there are only out_preserve_stack_slots + // to bias the optoregs (which impacts VMRegs) when actually referencing any actual stack + // location the c calling convention must add in this bias amount + // to make up for the fact that the out_preserve_stack_slots is + // insufficient for C calls. What a mess. I sure hope those 6 + // stack words were worth it on every java call! + + // Another way of cleaning this up would be for out_preserve_stack_slots + // to take a parameter to say whether it was C or java calling conventions. + // Then things might look a little better (but not much). + + int mem_parm_offset = i - SPARC_ARGS_IN_REGS_NUM; + if( mem_parm_offset < 0 ) { + return as_oRegister(i)->as_VMReg(); + } else { + int actual_offset = (mem_parm_offset + frame::memory_parameter_word_sp_offset) * VMRegImpl::slots_per_word; + // Now return a biased offset that will be correct when out_preserve_slots is added back in + return VMRegImpl::stack2reg(actual_offset - SharedRuntime::out_preserve_stack_slots()); + } +} + + +int SharedRuntime::c_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, + VMRegPair *regs2, + int total_args_passed) { + assert(regs2 == NULL, "not needed on sparc"); + + // Return the number of VMReg stack_slots needed for the args. + // This value does not include an abi space (like register window + // save area). + + // The native convention is V8 if !LP64 + // The LP64 convention is the V9 convention which is slightly more sane. + + // We return the amount of VMReg stack slots we need to reserve for all + // the arguments NOT counting out_preserve_stack_slots. Since we always + // have space for storing at least 6 registers to memory we start with that. + // See int_stk_helper for a further discussion. + int max_stack_slots = (frame::varargs_offset * VMRegImpl::slots_per_word) - SharedRuntime::out_preserve_stack_slots(); + + // V9 convention: All things "as-if" on double-wide stack slots. + // Hoist any int/ptr/long's in the first 6 to int regs. + // Hoist any flt/dbl's in the first 16 dbl regs. + int j = 0; // Count of actual args, not HALVES + VMRegPair param_array_reg; // location of the argument in the parameter array + for (int i = 0; i < total_args_passed; i++, j++) { + param_array_reg.set_bad(); + switch (sig_bt[i]) { + case T_BOOLEAN: + case T_BYTE: + case T_CHAR: + case T_INT: + case T_SHORT: + regs[i].set1(int_stk_helper(j)); + break; + case T_LONG: + assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half"); + case T_ADDRESS: // raw pointers, like current thread, for VM calls + case T_ARRAY: + case T_OBJECT: + case T_METADATA: + regs[i].set2(int_stk_helper(j)); + break; + case T_FLOAT: + // Per SPARC Compliance Definition 2.4.1, page 3P-12 available here + // http://www.sparc.org/wp-content/uploads/2014/01/SCD.2.4.1.pdf.gz + // + // "When a callee prototype exists, and does not indicate variable arguments, + // floating-point values assigned to locations %sp+BIAS+128 through %sp+BIAS+248 + // will be promoted to floating-point registers" + // + // By "promoted" it means that the argument is located in two places, an unused + // spill slot in the "parameter array" (starts at %sp+BIAS+128), and a live + // float register. In most cases, there are 6 or fewer arguments of any type, + // and the standard parameter array slots (%sp+BIAS+128 to %sp+BIAS+176 exclusive) + // serve as shadow slots. Per the spec floating point registers %d6 to %d16 + // require slots beyond that (up to %sp+BIAS+248). + // + { + // V9ism: floats go in ODD registers and stack slots + int float_index = 1 + (j << 1); + param_array_reg.set1(VMRegImpl::stack2reg(float_index)); + if (j < 16) { + regs[i].set1(as_FloatRegister(float_index)->as_VMReg()); + } else { + regs[i] = param_array_reg; + } + } + break; + case T_DOUBLE: + { + assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); + // V9ism: doubles go in EVEN/ODD regs and stack slots + int double_index = (j << 1); + param_array_reg.set2(VMRegImpl::stack2reg(double_index)); + if (j < 16) { + regs[i].set2(as_FloatRegister(double_index)->as_VMReg()); + } else { + // V9ism: doubles go in EVEN/ODD stack slots + regs[i] = param_array_reg; + } + } + break; + case T_VOID: + regs[i].set_bad(); + j--; + break; // Do not count HALVES + default: + ShouldNotReachHere(); + } + // Keep track of the deepest parameter array slot. + if (!param_array_reg.first()->is_valid()) { + param_array_reg = regs[i]; + } + if (param_array_reg.first()->is_stack()) { + int off = param_array_reg.first()->reg2stack(); + if (off > max_stack_slots) max_stack_slots = off; + } + if (param_array_reg.second()->is_stack()) { + int off = param_array_reg.second()->reg2stack(); + if (off > max_stack_slots) max_stack_slots = off; + } + } + return align_up(max_stack_slots + 1, 2); + +} + +int SharedRuntime::vector_calling_convention(VMRegPair *regs, + uint num_bits, + uint total_args_passed) { + Unimplemented(); + return 0; +} + +// --------------------------------------------------------------------------- +void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { + switch (ret_type) { + case T_FLOAT: + __ stf(FloatRegisterImpl::S, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 4+STACK_BIAS); + break; + case T_DOUBLE: + __ stf(FloatRegisterImpl::D, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 8+STACK_BIAS); + break; + } +} + +void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { + switch (ret_type) { + case T_FLOAT: + __ ldf(FloatRegisterImpl::S, SP, frame_slots*VMRegImpl::stack_slot_size - 4+STACK_BIAS, F0); + break; + case T_DOUBLE: + __ ldf(FloatRegisterImpl::D, SP, frame_slots*VMRegImpl::stack_slot_size - 8+STACK_BIAS, F0); + break; + } +} + +// Check and forward and pending exception. Thread is stored in +// L7_thread_cache and possibly NOT in G2_thread. Since this is a native call, there +// is no exception handler. We merely pop this frame off and throw the +// exception in the caller's frame. +static void check_forward_pending_exception(MacroAssembler *masm, Register Rex_oop) { + Label L; + __ br_null(Rex_oop, false, Assembler::pt, L); + __ delayed()->mov(L7_thread_cache, G2_thread); // restore in case we have exception + // Since this is a native call, we *know* the proper exception handler + // without calling into the VM: it's the empty function. Just pop this + // frame and then jump to forward_exception_entry; O7 will contain the + // native caller's return PC. + AddressLiteral exception_entry(StubRoutines::forward_exception_entry()); + __ jump_to(exception_entry, G3_scratch); + __ delayed()->restore(); // Pop this frame off. + __ bind(L); +} + +// A simple move of integer like type +static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack to stack + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); + __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS); + } else { + // stack to reg + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); + } + } else if (dst.first()->is_stack()) { + // reg to stack + __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); + } else { + __ mov(src.first()->as_Register(), dst.first()->as_Register()); + } +} + +// On 64 bit we will store integer like items to the stack as +// 64 bits items (sparc abi) even though java would only store +// 32bits for a parameter. On 32bit it will simply be 32 bits +// So this routine will do 32->32 on 32bit and 32->64 on 64bit +static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack to stack + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); + __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS); + } else { + // stack to reg + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); + } + } else if (dst.first()->is_stack()) { + // reg to stack + // Some compilers (gcc) expect a clean 32 bit value on function entry + __ signx(src.first()->as_Register(), L5); + __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS); + } else { + // Some compilers (gcc) expect a clean 32 bit value on function entry + __ signx(src.first()->as_Register(), dst.first()->as_Register()); + } +} + + +static void move_ptr(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack to stack + __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, L5); + __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS); + } else { + // stack to reg + __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); + } + } else if (dst.first()->is_stack()) { + // reg to stack + __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); + } else { + __ mov(src.first()->as_Register(), dst.first()->as_Register()); + } +} + + +// An oop arg. Must pass a handle not the oop itself +static void object_move(MacroAssembler* masm, + OopMap* map, + int oop_handle_offset, + int framesize_in_slots, + VMRegPair src, + VMRegPair dst, + bool is_receiver, + int* receiver_offset) { + + // must pass a handle. First figure out the location we use as a handle + + if (src.first()->is_stack()) { + // Oop is already on the stack + Register rHandle = dst.first()->is_stack() ? L5 : dst.first()->as_Register(); + __ add(FP, reg2offset(src.first()) + STACK_BIAS, rHandle); + __ ld_ptr(rHandle, 0, L4); + __ movr( Assembler::rc_z, L4, G0, rHandle ); + if (dst.first()->is_stack()) { + __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS); + } + int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); + if (is_receiver) { + *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; + } + map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); + } else { + // Oop is in an input register pass we must flush it to the stack + const Register rOop = src.first()->as_Register(); + const Register rHandle = L5; + int oop_slot = rOop->input_number() * VMRegImpl::slots_per_word + oop_handle_offset; + int offset = oop_slot * VMRegImpl::stack_slot_size; + __ st_ptr(rOop, SP, offset + STACK_BIAS); + if (is_receiver) { + *receiver_offset = offset; + } + map->set_oop(VMRegImpl::stack2reg(oop_slot)); + __ add(SP, offset + STACK_BIAS, rHandle); + __ movr( Assembler::rc_z, rOop, G0, rHandle ); + + if (dst.first()->is_stack()) { + __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS); + } else { + __ mov(rHandle, dst.first()->as_Register()); + } + } +} + +// A float arg may have to do float reg int reg conversion +static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); + + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack to stack the easiest of the bunch + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); + __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS); + } else { + // stack to reg + if (dst.first()->is_Register()) { + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); + } else { + __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister()); + } + } + } else if (dst.first()->is_stack()) { + // reg to stack + if (src.first()->is_Register()) { + __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); + } else { + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS); + } + } else { + // reg to reg + if (src.first()->is_Register()) { + if (dst.first()->is_Register()) { + // gpr -> gpr + __ mov(src.first()->as_Register(), dst.first()->as_Register()); + } else { + // gpr -> fpr + __ st(src.first()->as_Register(), FP, -4 + STACK_BIAS); + __ ldf(FloatRegisterImpl::S, FP, -4 + STACK_BIAS, dst.first()->as_FloatRegister()); + } + } else if (dst.first()->is_Register()) { + // fpr -> gpr + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), FP, -4 + STACK_BIAS); + __ ld(FP, -4 + STACK_BIAS, dst.first()->as_Register()); + } else { + // fpr -> fpr + // In theory these overlap but the ordering is such that this is likely a nop + if ( src.first() != dst.first()) { + __ fmov(FloatRegisterImpl::S, src.first()->as_FloatRegister(), dst.first()->as_FloatRegister()); + } + } + } +} + +static void split_long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + VMRegPair src_lo(src.first()); + VMRegPair src_hi(src.second()); + VMRegPair dst_lo(dst.first()); + VMRegPair dst_hi(dst.second()); + simple_move32(masm, src_lo, dst_lo); + simple_move32(masm, src_hi, dst_hi); +} + +// A long move +static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + + // Do the simple ones here else do two int moves + if (src.is_single_phys_reg() ) { + if (dst.is_single_phys_reg()) { + __ mov(src.first()->as_Register(), dst.first()->as_Register()); + } else { + // split src into two separate registers + // Remember hi means hi address or lsw on sparc + // Move msw to lsw + if (dst.second()->is_reg()) { + // MSW -> MSW + __ srax(src.first()->as_Register(), 32, dst.first()->as_Register()); + // Now LSW -> LSW + // this will only move lo -> lo and ignore hi + VMRegPair split(dst.second()); + simple_move32(masm, src, split); + } else { + VMRegPair split(src.first(), L4->as_VMReg()); + // MSW -> MSW (lo ie. first word) + __ srax(src.first()->as_Register(), 32, L4); + split_long_move(masm, split, dst); + } + } + } else if (dst.is_single_phys_reg()) { + if (src.is_adjacent_aligned_on_stack(2)) { + __ ldx(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); + } else { + // dst is a single reg. + // Remember lo is low address not msb for stack slots + // and lo is the "real" register for registers + // src is + + VMRegPair split; + + if (src.first()->is_reg()) { + // src.lo (msw) is a reg, src.hi is stk/reg + // we will move: src.hi (LSW) -> dst.lo, src.lo (MSW) -> src.lo [the MSW is in the LSW of the reg] + split.set_pair(dst.first(), src.first()); + } else { + // msw is stack move to L5 + // lsw is stack move to dst.lo (real reg) + // we will move: src.hi (LSW) -> dst.lo, src.lo (MSW) -> L5 + split.set_pair(dst.first(), L5->as_VMReg()); + } + + // src.lo -> src.lo/L5, src.hi -> dst.lo (the real reg) + // msw -> src.lo/L5, lsw -> dst.lo + split_long_move(masm, src, split); + + // So dst now has the low order correct position the + // msw half + __ sllx(split.first()->as_Register(), 32, L5); + + const Register d = dst.first()->as_Register(); + __ or3(L5, d, d); + } + } else { + // For LP64 we can probably do better. + split_long_move(masm, src, dst); + } +} + +// A double move +static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + + // The painful thing here is that like long_move a VMRegPair might be + // 1: a single physical register + // 2: two physical registers (v8) + // 3: a physical reg [lo] and a stack slot [hi] (v8) + // 4: two stack slots + + // Since src is always a java calling convention we know that the src pair + // is always either all registers or all stack (and aligned?) + + // in a register [lo] and a stack slot [hi] + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack to stack the easiest of the bunch + // ought to be a way to do this where if alignment is ok we use ldd/std when possible + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); + __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4); + __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS); + __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS); + } else { + // stack to reg + if (dst.second()->is_stack()) { + // stack -> reg, stack -> stack + __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4); + if (dst.first()->is_Register()) { + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); + } else { + __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister()); + } + // This was missing. (very rare case) + __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS); + } else { + // stack -> reg + // Eventually optimize for alignment QQQ + if (dst.first()->is_Register()) { + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); + __ ld(FP, reg2offset(src.second()) + STACK_BIAS, dst.second()->as_Register()); + } else { + __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister()); + __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.second()) + STACK_BIAS, dst.second()->as_FloatRegister()); + } + } + } + } else if (dst.first()->is_stack()) { + // reg to stack + if (src.first()->is_Register()) { + // Eventually optimize for alignment QQQ + __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); + if (src.second()->is_stack()) { + __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4); + __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS); + } else { + __ st(src.second()->as_Register(), SP, reg2offset(dst.second()) + STACK_BIAS); + } + } else { + // fpr to stack + if (src.second()->is_stack()) { + ShouldNotReachHere(); + } else { + // Is the stack aligned? + if (reg2offset(dst.first()) & 0x7) { + // No do as pairs + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS); + __ stf(FloatRegisterImpl::S, src.second()->as_FloatRegister(), SP, reg2offset(dst.second()) + STACK_BIAS); + } else { + __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS); + } + } + } + } else { + // reg to reg + if (src.first()->is_Register()) { + if (dst.first()->is_Register()) { + // gpr -> gpr + __ mov(src.first()->as_Register(), dst.first()->as_Register()); + __ mov(src.second()->as_Register(), dst.second()->as_Register()); + } else { + // gpr -> fpr + // ought to be able to do a single store + __ stx(src.first()->as_Register(), FP, -8 + STACK_BIAS); + __ stx(src.second()->as_Register(), FP, -4 + STACK_BIAS); + // ought to be able to do a single load + __ ldf(FloatRegisterImpl::S, FP, -8 + STACK_BIAS, dst.first()->as_FloatRegister()); + __ ldf(FloatRegisterImpl::S, FP, -4 + STACK_BIAS, dst.second()->as_FloatRegister()); + } + } else if (dst.first()->is_Register()) { + // fpr -> gpr + // ought to be able to do a single store + __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), FP, -8 + STACK_BIAS); + // ought to be able to do a single load + // REMEMBER first() is low address not LSB + __ ld(FP, -8 + STACK_BIAS, dst.first()->as_Register()); + if (dst.second()->is_Register()) { + __ ld(FP, -4 + STACK_BIAS, dst.second()->as_Register()); + } else { + __ ld(FP, -4 + STACK_BIAS, L4); + __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS); + } + } else { + // fpr -> fpr + // In theory these overlap but the ordering is such that this is likely a nop + if ( src.first() != dst.first()) { + __ fmov(FloatRegisterImpl::D, src.first()->as_FloatRegister(), dst.first()->as_FloatRegister()); + } + } + } +} + +// Creates an inner frame if one hasn't already been created, and +// saves a copy of the thread in L7_thread_cache +static void create_inner_frame(MacroAssembler* masm, bool* already_created) { + if (!*already_created) { + __ save_frame(0); + // Save thread in L7 (INNER FRAME); it crosses a bunch of VM calls below + // Don't use save_thread because it smashes G2 and we merely want to save a + // copy + __ mov(G2_thread, L7_thread_cache); + *already_created = true; + } +} + + +// Unpack an array argument into a pointer to the body and the length +// if the array is non-null, otherwise pass 0 for both. +static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { + // Pass the length, ptr pair + Label is_null, done; + if (reg.first()->is_stack()) { + VMRegPair tmp = reg64_to_VMRegPair(L2); + // Load the arg up from the stack + move_ptr(masm, reg, tmp); + reg = tmp; + } + __ cmp(reg.first()->as_Register(), G0); + __ brx(Assembler::equal, false, Assembler::pt, is_null); + __ delayed()->add(reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type), L4); + move_ptr(masm, reg64_to_VMRegPair(L4), body_arg); + __ ld(reg.first()->as_Register(), arrayOopDesc::length_offset_in_bytes(), L4); + move32_64(masm, reg64_to_VMRegPair(L4), length_arg); + __ ba_short(done); + __ bind(is_null); + // Pass zeros + move_ptr(masm, reg64_to_VMRegPair(G0), body_arg); + move32_64(masm, reg64_to_VMRegPair(G0), length_arg); + __ bind(done); +} + +static void verify_oop_args(MacroAssembler* masm, + const methodHandle& method, + const BasicType* sig_bt, + const VMRegPair* regs) { + Register temp_reg = G5_method; // not part of any compiled calling seq + if (VerifyOops) { + for (int i = 0; i < method->size_of_parameters(); i++) { + if (is_reference_type(sig_bt[i])) { + VMReg r = regs[i].first(); + assert(r->is_valid(), "bad oop arg"); + if (r->is_stack()) { + RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS; + ld_off = __ ensure_simm13_or_reg(ld_off, temp_reg); + __ ld_ptr(SP, ld_off, temp_reg); + __ verify_oop(temp_reg); + } else { + __ verify_oop(r->as_Register()); + } + } + } + } +} + +static void gen_special_dispatch(MacroAssembler* masm, + const methodHandle& method, + const BasicType* sig_bt, + const VMRegPair* regs) { + verify_oop_args(masm, method, sig_bt, regs); + vmIntrinsics::ID iid = method->intrinsic_id(); + + // Now write the args into the outgoing interpreter space + bool has_receiver = false; + Register receiver_reg = noreg; + int member_arg_pos = -1; + Register member_reg = noreg; + int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); + if (ref_kind != 0) { + member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument + member_reg = G5_method; // known to be free at this point + has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); + } else if (iid == vmIntrinsics::_invokeBasic) { + has_receiver = true; + } else { + fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid)); + } + + if (member_reg != noreg) { + // Load the member_arg into register, if necessary. + SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); + VMReg r = regs[member_arg_pos].first(); + if (r->is_stack()) { + RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS; + ld_off = __ ensure_simm13_or_reg(ld_off, member_reg); + __ ld_ptr(SP, ld_off, member_reg); + } else { + // no data motion is needed + member_reg = r->as_Register(); + } + } + + if (has_receiver) { + // Make sure the receiver is loaded into a register. + assert(method->size_of_parameters() > 0, "oob"); + assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); + VMReg r = regs[0].first(); + assert(r->is_valid(), "bad receiver arg"); + if (r->is_stack()) { + // Porting note: This assumes that compiled calling conventions always + // pass the receiver oop in a register. If this is not true on some + // platform, pick a temp and load the receiver from stack. + fatal("receiver always in a register"); + receiver_reg = G3_scratch; // known to be free at this point + RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS; + ld_off = __ ensure_simm13_or_reg(ld_off, member_reg); + __ ld_ptr(SP, ld_off, receiver_reg); + } else { + // no data motion is needed + receiver_reg = r->as_Register(); + } + } + + // Figure out which address we are really jumping to: + MethodHandles::generate_method_handle_dispatch(masm, iid, + receiver_reg, member_reg, /*for_compiler_entry:*/ true); +} + +// --------------------------------------------------------------------------- +// Generate a native wrapper for a given method. The method takes arguments +// in the Java compiled code convention, marshals them to the native +// convention (handlizes oops, etc), transitions to native, makes the call, +// returns to java state (possibly blocking), unhandlizes any result and +// returns. +// +// Critical native functions are a shorthand for the use of +// GetPrimtiveArrayCritical and disallow the use of any other JNI +// functions. The wrapper is expected to unpack the arguments before +// passing them to the callee and perform checks before and after the +// native call to ensure that they GCLocker +// lock_critical/unlock_critical semantics are followed. Some other +// parts of JNI setup are skipped like the tear down of the JNI handle +// block and the check for pending exceptions it's impossible for them +// to be thrown. +// +// They are roughly structured like this: +// if (GCLocker::needs_gc()) +// SharedRuntime::block_for_jni_critical(); +// transition to thread_in_native +// unpack array arguments and call native entry point +// check for safepoint in progress +// check if any thread suspend flags are set +// call into JVM and possible unlock the JNI critical +// if a GC was suppressed while in the critical native. +// transition back to thread_in_Java +// return to caller +// +nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + const methodHandle& method, + int compile_id, + BasicType* in_sig_bt, + VMRegPair* in_regs, + BasicType ret_type, + address critical_entry) { + if (method->is_method_handle_intrinsic()) { + vmIntrinsics::ID iid = method->intrinsic_id(); + intptr_t start = (intptr_t)__ pc(); + int vep_offset = ((intptr_t)__ pc()) - start; + gen_special_dispatch(masm, + method, + in_sig_bt, + in_regs); + int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period + __ flush(); + int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually + return nmethod::new_native_nmethod(method, + compile_id, + masm->code(), + vep_offset, + frame_complete, + stack_slots / VMRegImpl::slots_per_word, + in_ByteSize(-1), + in_ByteSize(-1), + (OopMapSet*)NULL); + } + bool is_critical_native = true; + address native_func = critical_entry; + if (native_func == NULL) { + native_func = method->native_function(); + is_critical_native = false; + } + assert(native_func != NULL, "must have function"); + + // Native nmethod wrappers never take possession of the oop arguments. + // So the caller will gc the arguments. The only thing we need an + // oopMap for is if the call is static + // + // An OopMap for lock (and class if static), and one for the VM call itself + OopMapSet *oop_maps = new OopMapSet(); + intptr_t start = (intptr_t)__ pc(); + + // First thing make an ic check to see if we should even be here + { + Label L; + const Register temp_reg = G3_scratch; + AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub()); + __ verify_oop(O0); + __ load_klass(O0, temp_reg); + __ cmp_and_brx_short(temp_reg, G5_inline_cache_reg, Assembler::equal, Assembler::pt, L); + + __ jump_to(ic_miss, temp_reg); + __ delayed()->nop(); + __ align(CodeEntryAlignment); + __ bind(L); + } + + int vep_offset = ((intptr_t)__ pc()) - start; + +#ifdef COMPILER1 + if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) { + // Object.hashCode, System.identityHashCode can pull the hashCode from the + // header word instead of doing a full VM transition once it's been computed. + // Since hashCode is usually polymorphic at call sites we can't do this + // optimization at the call site without a lot of work. + Label slowCase; + Label done; + Register obj_reg = O0; + Register result = O0; + Register header = G3_scratch; + Register hash = G3_scratch; // overwrite header value with hash value + Register mask = G1; // to get hash field from header + + // Unlike for Object.hashCode, System.identityHashCode is static method and + // gets object as argument instead of the receiver. + if (method->intrinsic_id() == vmIntrinsics::_identityHashCode) { + assert(method->is_static(), "method should be static"); + // return 0 for null reference input + __ br_null(obj_reg, false, Assembler::pn, done); + __ delayed()->mov(obj_reg, hash); + } + + // Read the header and build a mask to get its hash field. Give up if the object is not unlocked. + // We depend on hash_mask being at most 32 bits and avoid the use of + // hash_mask_in_place because it could be larger than 32 bits in a 64-bit + // vm: see markWord.hpp. + __ ld_ptr(obj_reg, oopDesc::mark_offset_in_bytes(), header); + __ sethi(markWord::hash_mask, mask); + __ btst(markWord::unlocked_value, header); + __ br(Assembler::zero, false, Assembler::pn, slowCase); + if (UseBiasedLocking) { + // Check if biased and fall through to runtime if so + __ delayed()->nop(); + __ btst(markWord::biased_lock_bit_in_place, header); + __ br(Assembler::notZero, false, Assembler::pn, slowCase); + } + __ delayed()->or3(mask, markWord::hash_mask & 0x3ff, mask); + + // Check for a valid (non-zero) hash code and get its value. + __ srlx(header, markWord::hash_shift, hash); + __ andcc(hash, mask, hash); + __ br(Assembler::equal, false, Assembler::pn, slowCase); + __ delayed()->nop(); + + // leaf return. + __ bind(done); + __ retl(); + __ delayed()->mov(hash, result); + __ bind(slowCase); + } +#endif // COMPILER1 + + + // We have received a description of where all the java arg are located + // on entry to the wrapper. We need to convert these args to where + // the jni function will expect them. To figure out where they go + // we convert the java signature to a C signature by inserting + // the hidden arguments as arg[0] and possibly arg[1] (static method) + + const int total_in_args = method->size_of_parameters(); + int total_c_args = total_in_args; + int total_save_slots = 6 * VMRegImpl::slots_per_word; + if (!is_critical_native) { + total_c_args += 1; + if (method->is_static()) { + total_c_args++; + } + } else { + for (int i = 0; i < total_in_args; i++) { + if (in_sig_bt[i] == T_ARRAY) { + // These have to be saved and restored across the safepoint + total_c_args++; + } + } + } + + BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); + VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); + BasicType* in_elem_bt = NULL; + + int argc = 0; + if (!is_critical_native) { + out_sig_bt[argc++] = T_ADDRESS; + if (method->is_static()) { + out_sig_bt[argc++] = T_OBJECT; + } + + for (int i = 0; i < total_in_args ; i++ ) { + out_sig_bt[argc++] = in_sig_bt[i]; + } + } else { + Thread* THREAD = Thread::current(); + in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); + SignatureStream ss(method->signature()); + for (int i = 0; i < total_in_args ; i++ ) { + if (in_sig_bt[i] == T_ARRAY) { + // Arrays are passed as int, elem* pair + out_sig_bt[argc++] = T_INT; + out_sig_bt[argc++] = T_ADDRESS; + ss.skip_array_prefix(1); // skip one '[' + assert(ss.is_primitive(), "primitive type expected"); + in_elem_bt[i] = ss.type(); + } else { + out_sig_bt[argc++] = in_sig_bt[i]; + in_elem_bt[i] = T_VOID; + } + if (in_sig_bt[i] != T_VOID) { + assert(in_sig_bt[i] == ss.type() || + in_sig_bt[i] == T_ARRAY, "must match"); + ss.next(); + } + } + } + + // Now figure out where the args must be stored and how much stack space + // they require (neglecting out_preserve_stack_slots but space for storing + // the 1st six register arguments). It's weird see int_stk_helper. + // + int out_arg_slots; + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); + + if (is_critical_native) { + // Critical natives may have to call out so they need a save area + // for register arguments. + int double_slots = 0; + int single_slots = 0; + for ( int i = 0; i < total_in_args; i++) { + if (in_regs[i].first()->is_Register()) { + const Register reg = in_regs[i].first()->as_Register(); + switch (in_sig_bt[i]) { + case T_ARRAY: + case T_BOOLEAN: + case T_BYTE: + case T_SHORT: + case T_CHAR: + case T_INT: assert(reg->is_in(), "don't need to save these"); break; + case T_LONG: if (reg->is_global()) double_slots++; break; + default: ShouldNotReachHere(); + } + } else if (in_regs[i].first()->is_FloatRegister()) { + switch (in_sig_bt[i]) { + case T_FLOAT: single_slots++; break; + case T_DOUBLE: double_slots++; break; + default: ShouldNotReachHere(); + } + } + } + total_save_slots = double_slots * 2 + single_slots; + } + + // Compute framesize for the wrapper. We need to handlize all oops in + // registers. We must create space for them here that is disjoint from + // the windowed save area because we have no control over when we might + // flush the window again and overwrite values that gc has since modified. + // (The live window race) + // + // We always just allocate 6 word for storing down these object. This allow + // us to simply record the base and use the Ireg number to decide which + // slot to use. (Note that the reg number is the inbound number not the + // outbound number). + // We must shuffle args to match the native convention, and include var-args space. + + // Calculate the total number of stack slots we will need. + + // First count the abi requirement plus all of the outgoing args + int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; + + // Now the space for the inbound oop handle area + + int oop_handle_offset = align_up(stack_slots, 2); + stack_slots += total_save_slots; + + // Now any space we need for handlizing a klass if static method + + int klass_slot_offset = 0; + int klass_offset = -1; + int lock_slot_offset = 0; + bool is_static = false; + + if (method->is_static()) { + klass_slot_offset = stack_slots; + stack_slots += VMRegImpl::slots_per_word; + klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; + is_static = true; + } + + // Plus a lock if needed + + if (method->is_synchronized()) { + lock_slot_offset = stack_slots; + stack_slots += VMRegImpl::slots_per_word; + } + + // Now a place to save return value or as a temporary for any gpr -> fpr moves + stack_slots += 2; + + // Ok The space we have allocated will look like: + // + // + // FP-> | | + // |---------------------| + // | 2 slots for moves | + // |---------------------| + // | lock box (if sync) | + // |---------------------| <- lock_slot_offset + // | klass (if static) | + // |---------------------| <- klass_slot_offset + // | oopHandle area | + // |---------------------| <- oop_handle_offset + // | outbound memory | + // | based arguments | + // | | + // |---------------------| + // | vararg area | + // |---------------------| + // | | + // SP-> | out_preserved_slots | + // + // + + + // Now compute actual number of stack words we need rounding to make + // stack properly aligned. + stack_slots = align_up(stack_slots, 2 * VMRegImpl::slots_per_word); + + int stack_size = stack_slots * VMRegImpl::stack_slot_size; + + // Generate stack overflow check before creating frame + __ generate_stack_overflow_check(stack_size); + + // Generate a new frame for the wrapper. + __ save(SP, -stack_size, SP); + + int frame_complete = ((intptr_t)__ pc()) - start; + + __ verify_thread(); + + // + // We immediately shuffle the arguments so that any vm call we have to + // make from here on out (sync slow path, jvmti, etc.) we will have + // captured the oops from our caller and have a valid oopMap for + // them. + + // ----------------- + // The Grand Shuffle + // + // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* + // (derived from JavaThread* which is in L7_thread_cache) and, if static, + // the class mirror instead of a receiver. This pretty much guarantees that + // register layout will not match. We ignore these extra arguments during + // the shuffle. The shuffle is described by the two calling convention + // vectors we have in our possession. We simply walk the java vector to + // get the source locations and the c vector to get the destinations. + // Because we have a new window and the argument registers are completely + // disjoint ( I0 -> O1, I1 -> O2, ...) we have nothing to worry about + // here. + + // This is a trick. We double the stack slots so we can claim + // the oops in the caller's frame. Since we are sure to have + // more args than the caller doubling is enough to make + // sure we can capture all the incoming oop args from the + // caller. + // + OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); + // Record sp-based slot for receiver on stack for non-static methods + int receiver_offset = -1; + + // We move the arguments backward because the floating point registers + // destination will always be to a register with a greater or equal register + // number or the stack. + +#ifdef ASSERT + bool reg_destroyed[RegisterImpl::number_of_registers]; + bool freg_destroyed[FloatRegisterImpl::number_of_registers]; + for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { + reg_destroyed[r] = false; + } + for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { + freg_destroyed[f] = false; + } + +#endif /* ASSERT */ + + for ( int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0 ; i--, c_arg-- ) { + +#ifdef ASSERT + if (in_regs[i].first()->is_Register()) { + assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "ack!"); + } else if (in_regs[i].first()->is_FloatRegister()) { + assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding(FloatRegisterImpl::S)], "ack!"); + } + if (out_regs[c_arg].first()->is_Register()) { + reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; + } else if (out_regs[c_arg].first()->is_FloatRegister()) { + freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding(FloatRegisterImpl::S)] = true; + } +#endif /* ASSERT */ + + switch (in_sig_bt[i]) { + case T_ARRAY: + if (is_critical_native) { + unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg], out_regs[c_arg - 1]); + c_arg--; + break; + } + case T_OBJECT: + assert(!is_critical_native, "no oop arguments"); + object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], + ((i == 0) && (!is_static)), + &receiver_offset); + break; + case T_VOID: + break; + + case T_FLOAT: + float_move(masm, in_regs[i], out_regs[c_arg]); + break; + + case T_DOUBLE: + assert( i + 1 < total_in_args && + in_sig_bt[i + 1] == T_VOID && + out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); + double_move(masm, in_regs[i], out_regs[c_arg]); + break; + + case T_LONG : + long_move(masm, in_regs[i], out_regs[c_arg]); + break; + + case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); + + default: + move32_64(masm, in_regs[i], out_regs[c_arg]); + } + } + + // Pre-load a static method's oop into O1. Used both by locking code and + // the normal JNI call code. + if (method->is_static() && !is_critical_native) { + __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), O1); + + // Now handlize the static class mirror in O1. It's known not-null. + __ st_ptr(O1, SP, klass_offset + STACK_BIAS); + map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); + __ add(SP, klass_offset + STACK_BIAS, O1); + } + + + const Register L6_handle = L6; + + if (method->is_synchronized()) { + assert(!is_critical_native, "unhandled"); + __ mov(O1, L6_handle); + } + + // We have all of the arguments setup at this point. We MUST NOT touch any Oregs + // except O6/O7. So if we must call out we must push a new frame. We immediately + // push a new frame and flush the windows. + intptr_t thepc = (intptr_t) __ pc(); + { + address here = __ pc(); + // Call the next instruction + __ call(here + 8, relocInfo::none); + __ delayed()->nop(); + } + + // We use the same pc/oopMap repeatedly when we call out + oop_maps->add_gc_map(thepc - start, map); + + // O7 now has the pc loaded that we will use when we finally call to native. + + // Save thread in L7; it crosses a bunch of VM calls below + // Don't use save_thread because it smashes G2 and we merely + // want to save a copy + __ mov(G2_thread, L7_thread_cache); + + + // If we create an inner frame once is plenty + // when we create it we must also save G2_thread + bool inner_frame_created = false; + + // dtrace method entry support + { + SkipIfEqual skip_if( + masm, G3_scratch, &DTraceMethodProbes, Assembler::zero); + // create inner frame + __ save_frame(0); + __ mov(G2_thread, L7_thread_cache); + __ set_metadata_constant(method(), O1); + __ call_VM_leaf(L7_thread_cache, + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), + G2_thread, O1); + __ restore(); + } + + // RedefineClasses() tracing support for obsolete method entry + if (log_is_enabled(Trace, redefine, class, obsolete)) { + // create inner frame + __ save_frame(0); + __ mov(G2_thread, L7_thread_cache); + __ set_metadata_constant(method(), O1); + __ call_VM_leaf(L7_thread_cache, + CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), + G2_thread, O1); + __ restore(); + } + + // We are in the jni frame unless saved_frame is true in which case + // we are in one frame deeper (the "inner" frame). If we are in the + // "inner" frames the args are in the Iregs and if the jni frame then + // they are in the Oregs. + // If we ever need to go to the VM (for locking, jvmti) then + // we will always be in the "inner" frame. + + // Lock a synchronized method + int lock_offset = -1; // Set if locked + if (method->is_synchronized()) { + Register Roop = O1; + const Register L3_box = L3; + + create_inner_frame(masm, &inner_frame_created); + + __ ld_ptr(I1, 0, O1); + Label done; + + lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size); + __ add(FP, lock_offset+STACK_BIAS, L3_box); +#ifdef ASSERT + if (UseBiasedLocking) { + // making the box point to itself will make it clear it went unused + // but also be obviously invalid + __ st_ptr(L3_box, L3_box, 0); + } +#endif // ASSERT + // + // Compiler_lock_object (Roop, Rmark, Rbox, Rscratch) -- kills Rmark, Rbox, Rscratch + // + __ compiler_lock_object(Roop, L1, L3_box, L2); + __ br(Assembler::equal, false, Assembler::pt, done); + __ delayed() -> add(FP, lock_offset+STACK_BIAS, L3_box); + + + // None of the above fast optimizations worked so we have to get into the + // slow case of monitor enter. Inline a special case of call_VM that + // disallows any pending_exception. + __ mov(Roop, O0); // Need oop in O0 + __ mov(L3_box, O1); + + // Record last_Java_sp, in case the VM code releases the JVM lock. + + __ set_last_Java_frame(FP, I7); + + // do the call + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type); + __ delayed()->mov(L7_thread_cache, O2); + + __ restore_thread(L7_thread_cache); // restore G2_thread + __ reset_last_Java_frame(); + +#ifdef ASSERT + { Label L; + __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0); + __ br_null_short(O0, Assembler::pt, L); + __ stop("no pending exception allowed on exit from IR::monitorenter"); + __ bind(L); + } +#endif + __ bind(done); + } + + + // Finally just about ready to make the JNI call + + __ flushw(); + if (inner_frame_created) { + __ restore(); + } else { + // Store only what we need from this frame + // QQQ I think that non-v9 (like we care) we don't need these saves + // either as the flush traps and the current window goes too. + __ st_ptr(FP, SP, FP->sp_offset_in_saved_window()*wordSize + STACK_BIAS); + __ st_ptr(I7, SP, I7->sp_offset_in_saved_window()*wordSize + STACK_BIAS); + } + + // get JNIEnv* which is first argument to native + if (!is_critical_native) { + __ add(G2_thread, in_bytes(JavaThread::jni_environment_offset()), O0); + } + + // Use that pc we placed in O7 a while back as the current frame anchor + __ set_last_Java_frame(SP, O7); + + // We flushed the windows ages ago now mark them as flushed before transitioning. + __ set(JavaFrameAnchor::flushed, G3_scratch); + __ st(G3_scratch, G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset()); + + if (!is_critical_native) { + // Transition from _thread_in_Java to _thread_in_native. + __ set(_thread_in_native, G3_scratch); + } + + AddressLiteral dest(native_func); + __ relocate(relocInfo::runtime_call_type); + __ jumpl_to(dest, O7, O7); + __ delayed()->st(G3_scratch, G2_thread, JavaThread::thread_state_offset()); + + __ restore_thread(L7_thread_cache); // restore G2_thread + + // Unpack native results. For int-types, we do any needed sign-extension + // and move things into I0. The return value there will survive any VM + // calls for blocking or unlocking. An FP or OOP result (handle) is done + // specially in the slow-path code. + switch (ret_type) { + case T_VOID: break; // Nothing to do! + case T_FLOAT: break; // Got it where we want it (unless slow-path) + case T_DOUBLE: break; // Got it where we want it (unless slow-path) + // In 64 bits build result is in O0, in O0, O1 in 32bit build + case T_LONG: + // Fall thru + case T_OBJECT: // Really a handle + case T_ARRAY: + case T_INT: + __ mov(O0, I0); + break; + case T_BOOLEAN: __ subcc(G0, O0, G0); __ addc(G0, 0, I0); break; // !0 => true; 0 => false + case T_BYTE : __ sll(O0, 24, O0); __ sra(O0, 24, I0); break; + case T_CHAR : __ sll(O0, 16, O0); __ srl(O0, 16, I0); break; // cannot use and3, 0xFFFF too big as immediate value! + case T_SHORT : __ sll(O0, 16, O0); __ sra(O0, 16, I0); break; + break; // Cannot de-handlize until after reclaiming jvm_lock + default: + ShouldNotReachHere(); + } + + Label after_transition; + + // If this is a critical native, check for a safepoint or suspend request after the call. + // If a safepoint is needed, transition to native, then to native_trans to handle + // safepoints like the native methods that are not critical natives. + if (is_critical_native) { + Label needs_safepoint; + Address suspend_state(G2_thread, JavaThread::suspend_flags_offset()); + __ safepoint_poll(needs_safepoint, false, G2_thread, G3_scratch); + __ delayed()->ld(suspend_state, G3_scratch); + __ cmp_and_br_short(G3_scratch, 0, Assembler::equal, Assembler::pt, after_transition); + __ bind(needs_safepoint); + } + + // must we block? + + // Block, if necessary, before resuming in _thread_in_Java state. + // In order for GC to work, don't clear the last_Java_sp until after blocking. + { Label no_block; + + // Switch thread to "native transition" state before reading the synchronization state. + // This additional state is necessary because reading and testing the synchronization + // state is not atomic w.r.t. GC, as this scenario demonstrates: + // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. + // VM thread changes sync state to synchronizing and suspends threads for GC. + // Thread A is resumed to finish this native method, but doesn't block here since it + // didn't see any synchronization is progress, and escapes. + __ set(_thread_in_native_trans, G3_scratch); + __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset()); + + // Force this write out before the read below + __ membar(Assembler::StoreLoad); + + Label L; + Address suspend_state(G2_thread, JavaThread::suspend_flags_offset()); + __ safepoint_poll(L, false, G2_thread, G3_scratch); + __ delayed()->ld(suspend_state, G3_scratch); + __ cmp_and_br_short(G3_scratch, 0, Assembler::equal, Assembler::pt, no_block); + __ bind(L); + + // Block. Save any potential method result value before the operation and + // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this + // lets us share the oopMap we used when we went native rather the create + // a distinct one for this pc + // + save_native_result(masm, ret_type, stack_slots); + __ call_VM_leaf(L7_thread_cache, + CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), + G2_thread); + + // Restore any method result value + restore_native_result(masm, ret_type, stack_slots); + + __ bind(no_block); + } + + // thread state is thread_in_native_trans. Any safepoint blocking has already + // happened so we can now change state to _thread_in_Java. + __ set(_thread_in_Java, G3_scratch); + __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset()); + __ bind(after_transition); + + Label no_reguard; + __ ld(G2_thread, JavaThread::stack_guard_state_offset(), G3_scratch); + __ cmp_and_br_short(G3_scratch, StackOverflow::stack_guard_yellow_reserved_disabled, Assembler::notEqual, Assembler::pt, no_reguard); + + save_native_result(masm, ret_type, stack_slots); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); + __ delayed()->nop(); + + __ restore_thread(L7_thread_cache); // restore G2_thread + restore_native_result(masm, ret_type, stack_slots); + + __ bind(no_reguard); + + // Handle possible exception (will unlock if necessary) + + // native result if any is live in freg or I0 (and I1 if long and 32bit vm) + + // Unlock + if (method->is_synchronized()) { + Label done; + Register I2_ex_oop = I2; + const Register L3_box = L3; + // Get locked oop from the handle we passed to jni + __ ld_ptr(L6_handle, 0, L4); + __ add(SP, lock_offset+STACK_BIAS, L3_box); + // Must save pending exception around the slow-path VM call. Since it's a + // leaf call, the pending exception (if any) can be kept in a register. + __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), I2_ex_oop); + // Now unlock + // (Roop, Rmark, Rbox, Rscratch) + __ compiler_unlock_object(L4, L1, L3_box, L2); + __ br(Assembler::equal, false, Assembler::pt, done); + __ delayed()-> add(SP, lock_offset+STACK_BIAS, L3_box); + + // save and restore any potential method result value around the unlocking + // operation. Will save in I0 (or stack for FP returns). + save_native_result(masm, ret_type, stack_slots); + + // Must clear pending-exception before re-entering the VM. Since this is + // a leaf call, pending-exception-oop can be safely kept in a register. + __ st_ptr(G0, G2_thread, in_bytes(Thread::pending_exception_offset())); + + // slow case of monitor enter. Inline a special case of call_VM that + // disallows any pending_exception. + __ mov(L3_box, O1); + + // Pass in current thread pointer + __ mov(G2_thread, O2); + + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), relocInfo::runtime_call_type); + __ delayed()->mov(L4, O0); // Need oop in O0 + + __ restore_thread(L7_thread_cache); // restore G2_thread + +#ifdef ASSERT + { Label L; + __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0); + __ br_null_short(O0, Assembler::pt, L); + __ stop("no pending exception allowed on exit from IR::monitorexit"); + __ bind(L); + } +#endif + restore_native_result(masm, ret_type, stack_slots); + // check_forward_pending_exception jump to forward_exception if any pending + // exception is set. The forward_exception routine expects to see the + // exception in pending_exception and not in a register. Kind of clumsy, + // since all folks who branch to forward_exception must have tested + // pending_exception first and hence have it in a register already. + __ st_ptr(I2_ex_oop, G2_thread, in_bytes(Thread::pending_exception_offset())); + __ bind(done); + } + + // Tell dtrace about this method exit + { + SkipIfEqual skip_if( + masm, G3_scratch, &DTraceMethodProbes, Assembler::zero); + save_native_result(masm, ret_type, stack_slots); + __ set_metadata_constant(method(), O1); + __ call_VM_leaf(L7_thread_cache, + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), + G2_thread, O1); + restore_native_result(masm, ret_type, stack_slots); + } + + // Clear "last Java frame" SP and PC. + __ verify_thread(); // G2_thread must be correct + __ reset_last_Java_frame(); + + // Unbox oop result, e.g. JNIHandles::resolve value in I0. + if (is_reference_type(ret_type)) { + __ resolve_jobject(I0, G3_scratch); + } + + if (CheckJNICalls) { + // clear_pending_jni_exception_check + __ st_ptr(G0, G2_thread, JavaThread::pending_jni_exception_check_fn_offset()); + } + + if (!is_critical_native) { + // reset handle block + __ ld_ptr(G2_thread, in_bytes(JavaThread::active_handles_offset()), L5); + __ st(G0, L5, JNIHandleBlock::top_offset_in_bytes()); + + __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), G3_scratch); + check_forward_pending_exception(masm, G3_scratch); + } + + + // Return + + __ ret(); + __ delayed()->restore(); + + __ flush(); + + nmethod *nm = nmethod::new_native_nmethod(method, + compile_id, + masm->code(), + vep_offset, + frame_complete, + stack_slots / VMRegImpl::slots_per_word, + (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), + in_ByteSize(lock_offset), + oop_maps); + + return nm; +} + +// this function returns the adjust size (in number of words) to a c2i adapter +// activation for use during deoptimization +int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { + assert(callee_locals >= callee_parameters, + "test and remove; got more parms than locals"); + if (callee_locals < callee_parameters) + return 0; // No adjustment for negative locals + int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords; + return align_up(diff, WordsPerLong); +} + +// "Top of Stack" slots that may be unused by the calling convention but must +// otherwise be preserved. +// On Intel these are not necessary and the value can be zero. +// On Sparc this describes the words reserved for storing a register window +// when an interrupt occurs. +uint SharedRuntime::out_preserve_stack_slots() { + return frame::register_save_words * VMRegImpl::slots_per_word; +} + +static void gen_new_frame(MacroAssembler* masm, bool deopt) { +// +// Common out the new frame generation for deopt and uncommon trap +// + Register G3pcs = G3_scratch; // Array of new pcs (input) + Register Oreturn0 = O0; + Register Oreturn1 = O1; + Register O2UnrollBlock = O2; + Register O3array = O3; // Array of frame sizes (input) + Register O4array_size = O4; // number of frames (input) + Register O7frame_size = O7; // number of frames (input) + + __ ld_ptr(O3array, 0, O7frame_size); + __ sub(G0, O7frame_size, O7frame_size); + __ save(SP, O7frame_size, SP); + __ ld_ptr(G3pcs, 0, I7); // load frame's new pc + + #ifdef ASSERT + // make sure that the frames are aligned properly + #endif + + // Deopt needs to pass some extra live values from frame to frame + + if (deopt) { + __ mov(Oreturn0->after_save(), Oreturn0); + __ mov(Oreturn1->after_save(), Oreturn1); + } + + __ mov(O4array_size->after_save(), O4array_size); + __ sub(O4array_size, 1, O4array_size); + __ mov(O3array->after_save(), O3array); + __ mov(O2UnrollBlock->after_save(), O2UnrollBlock); + __ add(G3pcs, wordSize, G3pcs); // point to next pc value + + #ifdef ASSERT + // trash registers to show a clear pattern in backtraces + __ set(0xDEAD0000, I0); + __ add(I0, 2, I1); + __ add(I0, 4, I2); + __ add(I0, 6, I3); + __ add(I0, 8, I4); + // Don't touch I5 could have valuable savedSP + __ set(0xDEADBEEF, L0); + __ mov(L0, L1); + __ mov(L0, L2); + __ mov(L0, L3); + __ mov(L0, L4); + __ mov(L0, L5); + + // trash the return value as there is nothing to return yet + __ set(0xDEAD0001, O7); + #endif + + __ mov(SP, O5_savedSP); +} + + +static void make_new_frames(MacroAssembler* masm, bool deopt) { + // + // loop through the UnrollBlock info and create new frames + // + Register G3pcs = G3_scratch; + Register Oreturn0 = O0; + Register Oreturn1 = O1; + Register O2UnrollBlock = O2; + Register O3array = O3; + Register O4array_size = O4; + Label loop; + +#ifdef ASSERT + // Compilers generate code that bang the stack by as much as the + // interpreter would need. So this stack banging should never + // trigger a fault. Verify that it does not on non product builds. + // Get total frame size for interpreted frames + __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes(), O4); + __ bang_stack_size(O4, O3, G3_scratch); +#endif + + __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes(), O4array_size); + __ ld_ptr(O2UnrollBlock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes(), G3pcs); + __ ld_ptr(O2UnrollBlock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes(), O3array); + + // Adjust old interpreter frame to make space for new frame's extra java locals + // + // We capture the original sp for the transition frame only because it is needed in + // order to properly calculate interpreter_sp_adjustment. Even though in real life + // every interpreter frame captures a savedSP it is only needed at the transition + // (fortunately). If we had to have it correct everywhere then we would need to + // be told the sp_adjustment for each frame we create. If the frame size array + // were to have twice the frame count entries then we could have pairs [sp_adjustment, frame_size] + // for each frame we create and keep up the illusion every where. + // + + __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes(), O7); + __ mov(SP, O5_savedSP); // remember initial sender's original sp before adjustment + __ sub(SP, O7, SP); + +#ifdef ASSERT + // make sure that there is at least one entry in the array + __ tst(O4array_size); + __ breakpoint_trap(Assembler::zero, Assembler::icc); +#endif + + // Now push the new interpreter frames + __ bind(loop); + + // allocate a new frame, filling the registers + + gen_new_frame(masm, deopt); // allocate an interpreter frame + + __ cmp_zero_and_br(Assembler::notZero, O4array_size, loop); + __ delayed()->add(O3array, wordSize, O3array); + __ ld_ptr(G3pcs, 0, O7); // load final frame new pc + +} + +//------------------------------generate_deopt_blob---------------------------- +// Ought to generate an ideal graph & compile, but here's some SPARC ASM +// instead. +void SharedRuntime::generate_deopt_blob() { + // allocate space for the code + ResourceMark rm; + // setup code generation tools + int pad = VerifyThread ? 512 : 0;// Extra slop space for more verify code +#ifdef ASSERT + pad += (StackOverflow::stack_shadow_zone_size() / os::vm_page_size())*16 + 32; +#endif +#if INCLUDE_JVMCI + if (EnableJVMCI) { + pad += 1000; // Increase the buffer size when compiling for JVMCI + } +#endif + CodeBuffer buffer("deopt_blob", 2100+pad, 512); + MacroAssembler* masm = new MacroAssembler(&buffer); + FloatRegister Freturn0 = F0; + Register Greturn1 = G1; + Register Oreturn0 = O0; + Register Oreturn1 = O1; + Register O2UnrollBlock = O2; + Register L0deopt_mode = L0; + Register G4deopt_mode = G4_scratch; + int frame_size_words; + Address saved_Freturn0_addr(FP, -sizeof(double) + STACK_BIAS); + Label cont; + + OopMapSet *oop_maps = new OopMapSet(); + + // + // This is the entry point for code which is returning to a de-optimized + // frame. + // The steps taken by this frame are as follows: + // - push a dummy "register_save" and save the return values (O0, O1, F0/F1, G1) + // and all potentially live registers (at a pollpoint many registers can be live). + // + // - call the C routine: Deoptimization::fetch_unroll_info (this function + // returns information about the number and size of interpreter frames + // which are equivalent to the frame which is being deoptimized) + // - deallocate the unpack frame, restoring only results values. Other + // volatile registers will now be captured in the vframeArray as needed. + // - deallocate the deoptimization frame + // - in a loop using the information returned in the previous step + // push new interpreter frames (take care to propagate the return + // values through each new frame pushed) + // - create a dummy "unpack_frame" and save the return values (O0, O1, F0) + // - call the C routine: Deoptimization::unpack_frames (this function + // lays out values on the interpreter frame which was just created) + // - deallocate the dummy unpack_frame + // - ensure that all the return values are correctly set and then do + // a return to the interpreter entry point + // + // Refer to the following methods for more information: + // - Deoptimization::fetch_unroll_info + // - Deoptimization::unpack_frames + + OopMap* map = NULL; + + int start = __ offset(); + + // restore G2, the trampoline destroyed it + __ get_thread(); + + // On entry we have been called by the deoptimized nmethod with a call that + // replaced the original call (or safepoint polling location) so the deoptimizing + // pc is now in O7. Return values are still in the expected places + + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); + __ ba(cont); + __ delayed()->mov(Deoptimization::Unpack_deopt, L0deopt_mode); + + +#if INCLUDE_JVMCI + Label after_fetch_unroll_info_call; + int implicit_exception_uncommon_trap_offset = 0; + int uncommon_trap_offset = 0; + + if (EnableJVMCI) { + masm->block_comment("BEGIN implicit_exception_uncommon_trap"); + implicit_exception_uncommon_trap_offset = __ offset() - start; + + __ ld_ptr(G2_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()), O7); + __ st_ptr(G0, Address(G2_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()))); + __ add(O7, -8, O7); + + uncommon_trap_offset = __ offset() - start; + + // Save everything in sight. + (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words); + __ set_last_Java_frame(SP, NULL); + + __ ld(G2_thread, in_bytes(JavaThread::pending_deoptimization_offset()), O1); + __ sub(G0, 1, L1); + __ st(L1, G2_thread, in_bytes(JavaThread::pending_deoptimization_offset())); + + __ mov((int32_t)Deoptimization::Unpack_reexecute, L0deopt_mode); + __ mov(G2_thread, O0); + __ mov(L0deopt_mode, O2); + __ call(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)); + __ delayed()->nop(); + oop_maps->add_gc_map( __ offset()-start, map->deep_copy()); + __ get_thread(); + __ add(O7, 8, O7); + __ reset_last_Java_frame(); + + __ ba(after_fetch_unroll_info_call); + __ delayed()->nop(); // Delay slot + masm->block_comment("END implicit_exception_uncommon_trap"); + } // EnableJVMCI +#endif // INCLUDE_JVMCI + + int exception_offset = __ offset() - start; + + // restore G2, the trampoline destroyed it + __ get_thread(); + + // On entry we have been jumped to by the exception handler (or exception_blob + // for server). O0 contains the exception oop and O7 contains the original + // exception pc. So if we push a frame here it will look to the + // stack walking code (fetch_unroll_info) just like a normal call so + // state will be extracted normally. + + // save exception oop in JavaThread and fall through into the + // exception_in_tls case since they are handled in same way except + // for where the pending exception is kept. + __ st_ptr(Oexception, G2_thread, JavaThread::exception_oop_offset()); + + // + // Vanilla deoptimization with an exception pending in exception_oop + // + int exception_in_tls_offset = __ offset() - start; + + // No need to update oop_map as each call to save_live_registers will produce identical oopmap + // Opens a new stack frame + (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words); + + // Restore G2_thread + __ get_thread(); + +#ifdef ASSERT + { + // verify that there is really an exception oop in exception_oop + Label has_exception; + __ ld_ptr(G2_thread, JavaThread::exception_oop_offset(), Oexception); + __ br_notnull_short(Oexception, Assembler::pt, has_exception); + __ stop("no exception in thread"); + __ bind(has_exception); + + // verify that there is no pending exception + Label no_pending_exception; + Address exception_addr(G2_thread, Thread::pending_exception_offset()); + __ ld_ptr(exception_addr, Oexception); + __ br_null_short(Oexception, Assembler::pt, no_pending_exception); + __ stop("must not have pending exception here"); + __ bind(no_pending_exception); + } +#endif + + __ ba(cont); + __ delayed()->mov(Deoptimization::Unpack_exception, L0deopt_mode);; + + // + // Reexecute entry, similar to c2 uncommon trap + // + int reexecute_offset = __ offset() - start; +#if INCLUDE_JVMCI && !defined(COMPILER1) + if (EnableJVMCI && UseJVMCICompiler) { + // JVMCI does not use this kind of deoptimization + __ should_not_reach_here(); + } +#endif + // No need to update oop_map as each call to save_live_registers will produce identical oopmap + (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words); + + __ mov(Deoptimization::Unpack_reexecute, L0deopt_mode); + + __ bind(cont); + + __ set_last_Java_frame(SP, noreg); + + // do the call by hand so we can get the oopmap + + __ mov(G2_thread, L7_thread_cache); + __ mov(L0deopt_mode, O1); + __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type); + __ delayed()->mov(G2_thread, O0); + + // Set an oopmap for the call site this describes all our saved volatile registers + + oop_maps->add_gc_map( __ offset()-start, map); + + __ mov(L7_thread_cache, G2_thread); + + __ reset_last_Java_frame(); + +#if INCLUDE_JVMCI + if (EnableJVMCI) { + __ bind(after_fetch_unroll_info_call); + } +#endif + // NOTE: we know that only O0/O1 will be reloaded by restore_result_registers + // so this move will survive + + __ mov(L0deopt_mode, G4deopt_mode); + + __ mov(O0, O2UnrollBlock->after_save()); + + RegisterSaver::restore_result_registers(masm); + + __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes(), G4deopt_mode); + Label noException; + __ cmp_and_br_short(G4deopt_mode, Deoptimization::Unpack_exception, Assembler::notEqual, Assembler::pt, noException); + + // Move the pending exception from exception_oop to Oexception so + // the pending exception will be picked up the interpreter. + __ ld_ptr(G2_thread, in_bytes(JavaThread::exception_oop_offset()), Oexception); + __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_oop_offset())); + __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_pc_offset())); + __ bind(noException); + + // deallocate the deoptimization frame taking care to preserve the return values + __ mov(Oreturn0, Oreturn0->after_save()); + __ mov(Oreturn1, Oreturn1->after_save()); + __ mov(O2UnrollBlock, O2UnrollBlock->after_save()); + __ restore(); + + // Allocate new interpreter frame(s) and possible c2i adapter frame + + make_new_frames(masm, true); + + // push a dummy "unpack_frame" taking care of float return values and + // call Deoptimization::unpack_frames to have the unpacker layout + // information in the interpreter frames just created and then return + // to the interpreter entry point + __ save(SP, -frame_size_words*wordSize, SP); + __ stf(FloatRegisterImpl::D, Freturn0, saved_Freturn0_addr); + // LP64 uses g4 in set_last_Java_frame + __ mov(G4deopt_mode, O1); + __ set_last_Java_frame(SP, G0); + __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O1); + __ reset_last_Java_frame(); + __ ldf(FloatRegisterImpl::D, saved_Freturn0_addr, Freturn0); + + __ ret(); + __ delayed()->restore(); + + masm->flush(); + _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_words); + _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); +#if INCLUDE_JVMCI + if (EnableJVMCI) { + _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset); + _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset); + } +#endif +} + +#ifdef COMPILER2 + +//------------------------------generate_uncommon_trap_blob-------------------- +// Ought to generate an ideal graph & compile, but here's some SPARC ASM +// instead. +void SharedRuntime::generate_uncommon_trap_blob() { + // allocate space for the code + ResourceMark rm; + // setup code generation tools + int pad = VerifyThread ? 512 : 0; +#ifdef ASSERT + pad += (StackOverflow::stack_shadow_zone_size() / os::vm_page_size())*16 + 32; +#endif + CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512); + MacroAssembler* masm = new MacroAssembler(&buffer); + Register O2UnrollBlock = O2; + Register O2klass_index = O2; + + // + // This is the entry point for all traps the compiler takes when it thinks + // it cannot handle further execution of compilation code. The frame is + // deoptimized in these cases and converted into interpreter frames for + // execution + // The steps taken by this frame are as follows: + // - push a fake "unpack_frame" + // - call the C routine Deoptimization::uncommon_trap (this function + // packs the current compiled frame into vframe arrays and returns + // information about the number and size of interpreter frames which + // are equivalent to the frame which is being deoptimized) + // - deallocate the "unpack_frame" + // - deallocate the deoptimization frame + // - in a loop using the information returned in the previous step + // push interpreter frames; + // - create a dummy "unpack_frame" + // - call the C routine: Deoptimization::unpack_frames (this function + // lays out values on the interpreter frame which was just created) + // - deallocate the dummy unpack_frame + // - return to the interpreter entry point + // + // Refer to the following methods for more information: + // - Deoptimization::uncommon_trap + // - Deoptimization::unpack_frame + + // the unloaded class index is in O0 (first parameter to this blob) + + // push a dummy "unpack_frame" + // and call Deoptimization::uncommon_trap to pack the compiled frame into + // vframe array and return the UnrollBlock information + __ save_frame(0); + __ set_last_Java_frame(SP, noreg); + __ mov(I0, O2klass_index); + __ mov(Deoptimization::Unpack_uncommon_trap, O3); // exec mode + __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), G2_thread, O2klass_index, O3); + __ reset_last_Java_frame(); + __ mov(O0, O2UnrollBlock->after_save()); + __ restore(); + + // deallocate the deoptimized frame taking care to preserve the return values + __ mov(O2UnrollBlock, O2UnrollBlock->after_save()); + __ restore(); + +#ifdef ASSERT + { Label L; + __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes(), O1); + __ cmp_and_br_short(O1, Deoptimization::Unpack_uncommon_trap, Assembler::equal, Assembler::pt, L); + __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap"); + __ bind(L); + } +#endif + + // Allocate new interpreter frame(s) and possible c2i adapter frame + + make_new_frames(masm, false); + + // push a dummy "unpack_frame" taking care of float return values and + // call Deoptimization::unpack_frames to have the unpacker layout + // information in the interpreter frames just created and then return + // to the interpreter entry point + __ save_frame(0); + __ set_last_Java_frame(SP, noreg); + __ mov(Deoptimization::Unpack_uncommon_trap, O3); // indicate it is the uncommon trap case + __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O3); + __ reset_last_Java_frame(); + __ ret(); + __ delayed()->restore(); + + masm->flush(); + _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, __ total_frame_size_in_bytes(0)/wordSize); +} + +#endif // COMPILER2 + +//------------------------------generate_handler_blob------------------- +// +// Generate a special Compile2Runtime blob that saves all registers, and sets +// up an OopMap. +// +// This blob is jumped to (via a breakpoint and the signal handler) from a +// safepoint in compiled code. On entry to this blob, O7 contains the +// address in the original nmethod at which we should resume normal execution. +// Thus, this blob looks like a subroutine which must preserve lots of +// registers and return normally. Note that O7 is never register-allocated, +// so it is guaranteed to be free here. +// + +// The hardest part of what this blob must do is to save the 64-bit %o +// registers in the 32-bit build. A simple 'save' turn the %o's to %i's and +// an interrupt will chop off their heads. Making space in the caller's frame +// first will let us save the 64-bit %o's before save'ing, but we cannot hand +// the adjusted FP off to the GC stack-crawler: this will modify the caller's +// SP and mess up HIS OopMaps. So we first adjust the caller's SP, then save +// the 64-bit %o's, then do a save, then fixup the caller's SP (our FP). +// Tricky, tricky, tricky... + +SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { + assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); + + // allocate space for the code + ResourceMark rm; + // setup code generation tools + // Measured 8/7/03 at 896 in 32bit debug build (no VerifyThread) + // Measured 8/7/03 at 1080 in 32bit debug build (VerifyThread) + CodeBuffer buffer("handler_blob", 1600, 512); + MacroAssembler* masm = new MacroAssembler(&buffer); + int frame_size_words; + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map = NULL; + + int start = __ offset(); + + bool cause_return = (poll_type == POLL_AT_RETURN); + // If this causes a return before the processing, then do a "restore" + if (cause_return) { + __ restore(); + } else { + // Make it look like we were called via the poll + // so that frame constructor always sees a valid return address + __ ld_ptr(Address(G2_thread, JavaThread::saved_exception_pc_offset()), O7); + __ sub(O7, frame::pc_return_offset, O7); + } + + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); + + // setup last_Java_sp (blows G4) + __ set_last_Java_frame(SP, noreg); + + Register saved_O7 = O7->after_save(); + if (!cause_return) { + // Keep a copy of the return pc in L0 to detect if it gets modified + __ mov(saved_O7, L0); + // Adjust and keep a copy of our npc saved by the signal handler + __ ld_ptr(Address(G2_thread, JavaThread::saved_exception_npc_offset()), L1); + __ sub(L1, frame::pc_return_offset, L1); + } + + // call into the runtime to handle illegal instructions exception + // Do not use call_VM_leaf, because we need to make a GC map at this call site. + __ mov(G2_thread, O0); + __ save_thread(L7_thread_cache); + __ call(call_ptr); + __ delayed()->nop(); + + // Set an oopmap for the call site. + // We need this not only for callee-saved registers, but also for volatile + // registers that the compiler might be keeping live across a safepoint. + + oop_maps->add_gc_map( __ offset() - start, map); + + __ restore_thread(L7_thread_cache); + // clear last_Java_sp + __ reset_last_Java_frame(); + + // Check for exceptions + Label pending; + + __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1); + __ br_notnull_short(O1, Assembler::pn, pending); + + if (!cause_return) { + // If nobody modified our return pc then we must return to the npc which he saved in L1 + __ cmp(saved_O7, L0); + __ movcc(Assembler::equal, false, Assembler::ptr_cc, L1, saved_O7); + } + + RegisterSaver::restore_live_registers(masm); + + // We are back the the original state on entry and ready to go. + + __ retl(); + __ delayed()->nop(); + + // Pending exception after the safepoint + + __ bind(pending); + + RegisterSaver::restore_live_registers(masm); + + // We are back the the original state on entry. + + // Tail-call forward_exception_entry, with the issuing PC in O7, + // so it looks like the original nmethod called forward_exception_entry. + __ set((intptr_t)StubRoutines::forward_exception_entry(), O0); + __ JMP(O0, 0); + __ delayed()->nop(); + + // ------------- + // make sure all code is generated + masm->flush(); + + // return exception blob + return SafepointBlob::create(&buffer, oop_maps, frame_size_words); +} + +// +// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss +// +// Generate a stub that calls into vm to find out the proper destination +// of a java call. All the argument registers are live at this point +// but since this is generic code we don't know what they are and the caller +// must do any gc of the args. +// +RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { + assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); + + // allocate space for the code + ResourceMark rm; + // setup code generation tools + // Measured 8/7/03 at 896 in 32bit debug build (no VerifyThread) + // Measured 8/7/03 at 1080 in 32bit debug build (VerifyThread) + CodeBuffer buffer(name, 1600, 512); + MacroAssembler* masm = new MacroAssembler(&buffer); + int frame_size_words; + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map = NULL; + + int start = __ offset(); + + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); + + int frame_complete = __ offset(); + + // setup last_Java_sp (blows G4) + __ set_last_Java_frame(SP, noreg); + + // call into the runtime to handle illegal instructions exception + // Do not use call_VM_leaf, because we need to make a GC map at this call site. + __ mov(G2_thread, O0); + __ save_thread(L7_thread_cache); + __ call(destination, relocInfo::runtime_call_type); + __ delayed()->nop(); + + // O0 contains the address we are going to jump to assuming no exception got installed + + // Set an oopmap for the call site. + // We need this not only for callee-saved registers, but also for volatile + // registers that the compiler might be keeping live across a safepoint. + + oop_maps->add_gc_map( __ offset() - start, map); + + __ restore_thread(L7_thread_cache); + // clear last_Java_sp + __ reset_last_Java_frame(); + + // Check for exceptions + Label pending; + + __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1); + __ br_notnull_short(O1, Assembler::pn, pending); + + // get the returned Method* + + __ get_vm_result_2(G5_method); + __ stx(G5_method, SP, RegisterSaver::G5_offset()+STACK_BIAS); + + // O0 is where we want to jump, overwrite G3 which is saved and scratch + + __ stx(O0, SP, RegisterSaver::G3_offset()+STACK_BIAS); + + RegisterSaver::restore_live_registers(masm); + + // We are back the the original state on entry and ready to go. + + __ JMP(G3, 0); + __ delayed()->nop(); + + // Pending exception after the safepoint + + __ bind(pending); + + RegisterSaver::restore_live_registers(masm); + + // We are back the the original state on entry. + + // Tail-call forward_exception_entry, with the issuing PC in O7, + // so it looks like the original nmethod called forward_exception_entry. + __ set((intptr_t)StubRoutines::forward_exception_entry(), O0); + __ JMP(O0, 0); + __ delayed()->nop(); + + // ------------- + // make sure all code is generated + masm->flush(); + + // return the blob + // frame_size_words or bytes?? + return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true); +} + +#ifdef COMPILER2 +RuntimeStub* SharedRuntime::make_native_invoker(address call_target, + int shadow_space_bytes, + const GrowableArray& input_registers, + const GrowableArray& output_registers) { + Unimplemented(); + return nullptr; +} +#endif diff -ur --new-file a/src/hotspot/cpu/sparc/sparc.ad b/src/hotspot/cpu/sparc/sparc.ad --- a/src/hotspot/cpu/sparc/sparc.ad 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/sparc.ad 2023-04-16 11:42:11.074882766 +0000 @@ -0,0 +1,10794 @@ +// +// Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + +// SPARC Architecture Description File + +//----------REGISTER DEFINITION BLOCK------------------------------------------ +// This information is used by the matcher and the register allocator to +// describe individual registers and classes of registers within the target +// architecture. +register %{ +//----------Architecture Description Register Definitions---------------------- +// General Registers +// "reg_def" name ( register save type, C convention save type, +// ideal register type, encoding, vm name ); +// Register Save Types: +// +// NS = No-Save: The register allocator assumes that these registers +// can be used without saving upon entry to the method, & +// that they do not need to be saved at call sites. +// +// SOC = Save-On-Call: The register allocator assumes that these registers +// can be used without saving upon entry to the method, +// but that they must be saved at call sites. +// +// SOE = Save-On-Entry: The register allocator assumes that these registers +// must be saved before using them upon entry to the +// method, but they do not need to be saved at call +// sites. +// +// AS = Always-Save: The register allocator assumes that these registers +// must be saved before using them upon entry to the +// method, & that they must be saved at call sites. +// +// Ideal Register Type is used to determine how to save & restore a +// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get +// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. +// +// The encoding number is the actual bit-pattern placed into the opcodes. + + +// ---------------------------- +// Integer/Long Registers +// ---------------------------- + +// Need to expose the hi/lo aspect of 64-bit registers +// This register set is used for both the 64-bit build and +// the 32-bit build with 1-register longs. + +// Global Registers 0-7 +reg_def R_G0H( NS, NS, Op_RegI,128, G0->as_VMReg()->next()); +reg_def R_G0 ( NS, NS, Op_RegI, 0, G0->as_VMReg()); +reg_def R_G1H(SOC, SOC, Op_RegI,129, G1->as_VMReg()->next()); +reg_def R_G1 (SOC, SOC, Op_RegI, 1, G1->as_VMReg()); +reg_def R_G2H( NS, NS, Op_RegI,130, G2->as_VMReg()->next()); +reg_def R_G2 ( NS, NS, Op_RegI, 2, G2->as_VMReg()); +reg_def R_G3H(SOC, SOC, Op_RegI,131, G3->as_VMReg()->next()); +reg_def R_G3 (SOC, SOC, Op_RegI, 3, G3->as_VMReg()); +reg_def R_G4H(SOC, SOC, Op_RegI,132, G4->as_VMReg()->next()); +reg_def R_G4 (SOC, SOC, Op_RegI, 4, G4->as_VMReg()); +reg_def R_G5H(SOC, SOC, Op_RegI,133, G5->as_VMReg()->next()); +reg_def R_G5 (SOC, SOC, Op_RegI, 5, G5->as_VMReg()); +reg_def R_G6H( NS, NS, Op_RegI,134, G6->as_VMReg()->next()); +reg_def R_G6 ( NS, NS, Op_RegI, 6, G6->as_VMReg()); +reg_def R_G7H( NS, NS, Op_RegI,135, G7->as_VMReg()->next()); +reg_def R_G7 ( NS, NS, Op_RegI, 7, G7->as_VMReg()); + +// Output Registers 0-7 +reg_def R_O0H(SOC, SOC, Op_RegI,136, O0->as_VMReg()->next()); +reg_def R_O0 (SOC, SOC, Op_RegI, 8, O0->as_VMReg()); +reg_def R_O1H(SOC, SOC, Op_RegI,137, O1->as_VMReg()->next()); +reg_def R_O1 (SOC, SOC, Op_RegI, 9, O1->as_VMReg()); +reg_def R_O2H(SOC, SOC, Op_RegI,138, O2->as_VMReg()->next()); +reg_def R_O2 (SOC, SOC, Op_RegI, 10, O2->as_VMReg()); +reg_def R_O3H(SOC, SOC, Op_RegI,139, O3->as_VMReg()->next()); +reg_def R_O3 (SOC, SOC, Op_RegI, 11, O3->as_VMReg()); +reg_def R_O4H(SOC, SOC, Op_RegI,140, O4->as_VMReg()->next()); +reg_def R_O4 (SOC, SOC, Op_RegI, 12, O4->as_VMReg()); +reg_def R_O5H(SOC, SOC, Op_RegI,141, O5->as_VMReg()->next()); +reg_def R_O5 (SOC, SOC, Op_RegI, 13, O5->as_VMReg()); +reg_def R_SPH( NS, NS, Op_RegI,142, SP->as_VMReg()->next()); +reg_def R_SP ( NS, NS, Op_RegI, 14, SP->as_VMReg()); +reg_def R_O7H(SOC, SOC, Op_RegI,143, O7->as_VMReg()->next()); +reg_def R_O7 (SOC, SOC, Op_RegI, 15, O7->as_VMReg()); + +// Local Registers 0-7 +reg_def R_L0H( NS, NS, Op_RegI,144, L0->as_VMReg()->next()); +reg_def R_L0 ( NS, NS, Op_RegI, 16, L0->as_VMReg()); +reg_def R_L1H( NS, NS, Op_RegI,145, L1->as_VMReg()->next()); +reg_def R_L1 ( NS, NS, Op_RegI, 17, L1->as_VMReg()); +reg_def R_L2H( NS, NS, Op_RegI,146, L2->as_VMReg()->next()); +reg_def R_L2 ( NS, NS, Op_RegI, 18, L2->as_VMReg()); +reg_def R_L3H( NS, NS, Op_RegI,147, L3->as_VMReg()->next()); +reg_def R_L3 ( NS, NS, Op_RegI, 19, L3->as_VMReg()); +reg_def R_L4H( NS, NS, Op_RegI,148, L4->as_VMReg()->next()); +reg_def R_L4 ( NS, NS, Op_RegI, 20, L4->as_VMReg()); +reg_def R_L5H( NS, NS, Op_RegI,149, L5->as_VMReg()->next()); +reg_def R_L5 ( NS, NS, Op_RegI, 21, L5->as_VMReg()); +reg_def R_L6H( NS, NS, Op_RegI,150, L6->as_VMReg()->next()); +reg_def R_L6 ( NS, NS, Op_RegI, 22, L6->as_VMReg()); +reg_def R_L7H( NS, NS, Op_RegI,151, L7->as_VMReg()->next()); +reg_def R_L7 ( NS, NS, Op_RegI, 23, L7->as_VMReg()); + +// Input Registers 0-7 +reg_def R_I0H( NS, NS, Op_RegI,152, I0->as_VMReg()->next()); +reg_def R_I0 ( NS, NS, Op_RegI, 24, I0->as_VMReg()); +reg_def R_I1H( NS, NS, Op_RegI,153, I1->as_VMReg()->next()); +reg_def R_I1 ( NS, NS, Op_RegI, 25, I1->as_VMReg()); +reg_def R_I2H( NS, NS, Op_RegI,154, I2->as_VMReg()->next()); +reg_def R_I2 ( NS, NS, Op_RegI, 26, I2->as_VMReg()); +reg_def R_I3H( NS, NS, Op_RegI,155, I3->as_VMReg()->next()); +reg_def R_I3 ( NS, NS, Op_RegI, 27, I3->as_VMReg()); +reg_def R_I4H( NS, NS, Op_RegI,156, I4->as_VMReg()->next()); +reg_def R_I4 ( NS, NS, Op_RegI, 28, I4->as_VMReg()); +reg_def R_I5H( NS, NS, Op_RegI,157, I5->as_VMReg()->next()); +reg_def R_I5 ( NS, NS, Op_RegI, 29, I5->as_VMReg()); +reg_def R_FPH( NS, NS, Op_RegI,158, FP->as_VMReg()->next()); +reg_def R_FP ( NS, NS, Op_RegI, 30, FP->as_VMReg()); +reg_def R_I7H( NS, NS, Op_RegI,159, I7->as_VMReg()->next()); +reg_def R_I7 ( NS, NS, Op_RegI, 31, I7->as_VMReg()); + +// ---------------------------- +// Float/Double Registers +// ---------------------------- + +// Float Registers +reg_def R_F0 ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()); +reg_def R_F1 ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()); +reg_def R_F2 ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()); +reg_def R_F3 ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()); +reg_def R_F4 ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()); +reg_def R_F5 ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()); +reg_def R_F6 ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()); +reg_def R_F7 ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()); +reg_def R_F8 ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()); +reg_def R_F9 ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()); +reg_def R_F10( SOC, SOC, Op_RegF, 10, F10->as_VMReg()); +reg_def R_F11( SOC, SOC, Op_RegF, 11, F11->as_VMReg()); +reg_def R_F12( SOC, SOC, Op_RegF, 12, F12->as_VMReg()); +reg_def R_F13( SOC, SOC, Op_RegF, 13, F13->as_VMReg()); +reg_def R_F14( SOC, SOC, Op_RegF, 14, F14->as_VMReg()); +reg_def R_F15( SOC, SOC, Op_RegF, 15, F15->as_VMReg()); +reg_def R_F16( SOC, SOC, Op_RegF, 16, F16->as_VMReg()); +reg_def R_F17( SOC, SOC, Op_RegF, 17, F17->as_VMReg()); +reg_def R_F18( SOC, SOC, Op_RegF, 18, F18->as_VMReg()); +reg_def R_F19( SOC, SOC, Op_RegF, 19, F19->as_VMReg()); +reg_def R_F20( SOC, SOC, Op_RegF, 20, F20->as_VMReg()); +reg_def R_F21( SOC, SOC, Op_RegF, 21, F21->as_VMReg()); +reg_def R_F22( SOC, SOC, Op_RegF, 22, F22->as_VMReg()); +reg_def R_F23( SOC, SOC, Op_RegF, 23, F23->as_VMReg()); +reg_def R_F24( SOC, SOC, Op_RegF, 24, F24->as_VMReg()); +reg_def R_F25( SOC, SOC, Op_RegF, 25, F25->as_VMReg()); +reg_def R_F26( SOC, SOC, Op_RegF, 26, F26->as_VMReg()); +reg_def R_F27( SOC, SOC, Op_RegF, 27, F27->as_VMReg()); +reg_def R_F28( SOC, SOC, Op_RegF, 28, F28->as_VMReg()); +reg_def R_F29( SOC, SOC, Op_RegF, 29, F29->as_VMReg()); +reg_def R_F30( SOC, SOC, Op_RegF, 30, F30->as_VMReg()); +reg_def R_F31( SOC, SOC, Op_RegF, 31, F31->as_VMReg()); + +// Double Registers +// The rules of ADL require that double registers be defined in pairs. +// Each pair must be two 32-bit values, but not necessarily a pair of +// single float registers. In each pair, ADLC-assigned register numbers +// must be adjacent, with the lower number even. Finally, when the +// CPU stores such a register pair to memory, the word associated with +// the lower ADLC-assigned number must be stored to the lower address. + +// These definitions specify the actual bit encodings of the sparc +// double fp register numbers. FloatRegisterImpl in register_sparc.hpp +// wants 0-63, so we have to convert every time we want to use fp regs +// with the macroassembler, using reg_to_DoubleFloatRegister_object(). +// 255 is a flag meaning "don't go here". +// I believe we can't handle callee-save doubles D32 and up until +// the place in the sparc stack crawler that asserts on the 255 is +// fixed up. +reg_def R_D32 (SOC, SOC, Op_RegD, 1, F32->as_VMReg()); +reg_def R_D32x(SOC, SOC, Op_RegD,255, F32->as_VMReg()->next()); +reg_def R_D34 (SOC, SOC, Op_RegD, 3, F34->as_VMReg()); +reg_def R_D34x(SOC, SOC, Op_RegD,255, F34->as_VMReg()->next()); +reg_def R_D36 (SOC, SOC, Op_RegD, 5, F36->as_VMReg()); +reg_def R_D36x(SOC, SOC, Op_RegD,255, F36->as_VMReg()->next()); +reg_def R_D38 (SOC, SOC, Op_RegD, 7, F38->as_VMReg()); +reg_def R_D38x(SOC, SOC, Op_RegD,255, F38->as_VMReg()->next()); +reg_def R_D40 (SOC, SOC, Op_RegD, 9, F40->as_VMReg()); +reg_def R_D40x(SOC, SOC, Op_RegD,255, F40->as_VMReg()->next()); +reg_def R_D42 (SOC, SOC, Op_RegD, 11, F42->as_VMReg()); +reg_def R_D42x(SOC, SOC, Op_RegD,255, F42->as_VMReg()->next()); +reg_def R_D44 (SOC, SOC, Op_RegD, 13, F44->as_VMReg()); +reg_def R_D44x(SOC, SOC, Op_RegD,255, F44->as_VMReg()->next()); +reg_def R_D46 (SOC, SOC, Op_RegD, 15, F46->as_VMReg()); +reg_def R_D46x(SOC, SOC, Op_RegD,255, F46->as_VMReg()->next()); +reg_def R_D48 (SOC, SOC, Op_RegD, 17, F48->as_VMReg()); +reg_def R_D48x(SOC, SOC, Op_RegD,255, F48->as_VMReg()->next()); +reg_def R_D50 (SOC, SOC, Op_RegD, 19, F50->as_VMReg()); +reg_def R_D50x(SOC, SOC, Op_RegD,255, F50->as_VMReg()->next()); +reg_def R_D52 (SOC, SOC, Op_RegD, 21, F52->as_VMReg()); +reg_def R_D52x(SOC, SOC, Op_RegD,255, F52->as_VMReg()->next()); +reg_def R_D54 (SOC, SOC, Op_RegD, 23, F54->as_VMReg()); +reg_def R_D54x(SOC, SOC, Op_RegD,255, F54->as_VMReg()->next()); +reg_def R_D56 (SOC, SOC, Op_RegD, 25, F56->as_VMReg()); +reg_def R_D56x(SOC, SOC, Op_RegD,255, F56->as_VMReg()->next()); +reg_def R_D58 (SOC, SOC, Op_RegD, 27, F58->as_VMReg()); +reg_def R_D58x(SOC, SOC, Op_RegD,255, F58->as_VMReg()->next()); +reg_def R_D60 (SOC, SOC, Op_RegD, 29, F60->as_VMReg()); +reg_def R_D60x(SOC, SOC, Op_RegD,255, F60->as_VMReg()->next()); +reg_def R_D62 (SOC, SOC, Op_RegD, 31, F62->as_VMReg()); +reg_def R_D62x(SOC, SOC, Op_RegD,255, F62->as_VMReg()->next()); + + +// ---------------------------- +// Special Registers +// Condition Codes Flag Registers +// I tried to break out ICC and XCC but it's not very pretty. +// Every Sparc instruction which defs/kills one also kills the other. +// Hence every compare instruction which defs one kind of flags ends +// up needing a kill of the other. +reg_def CCR (SOC, SOC, Op_RegFlags, 0, VMRegImpl::Bad()); + +reg_def FCC0(SOC, SOC, Op_RegFlags, 0, VMRegImpl::Bad()); +reg_def FCC1(SOC, SOC, Op_RegFlags, 1, VMRegImpl::Bad()); +reg_def FCC2(SOC, SOC, Op_RegFlags, 2, VMRegImpl::Bad()); +reg_def FCC3(SOC, SOC, Op_RegFlags, 3, VMRegImpl::Bad()); + +// ---------------------------- +// Specify the enum values for the registers. These enums are only used by the +// OptoReg "class". We can convert these enum values at will to VMReg when needed +// for visibility to the rest of the vm. The order of this enum influences the +// register allocator so having the freedom to set this order and not be stuck +// with the order that is natural for the rest of the vm is worth it. +alloc_class chunk0( + R_L0,R_L0H, R_L1,R_L1H, R_L2,R_L2H, R_L3,R_L3H, R_L4,R_L4H, R_L5,R_L5H, R_L6,R_L6H, R_L7,R_L7H, + R_G0,R_G0H, R_G1,R_G1H, R_G2,R_G2H, R_G3,R_G3H, R_G4,R_G4H, R_G5,R_G5H, R_G6,R_G6H, R_G7,R_G7H, + R_O7,R_O7H, R_SP,R_SPH, R_O0,R_O0H, R_O1,R_O1H, R_O2,R_O2H, R_O3,R_O3H, R_O4,R_O4H, R_O5,R_O5H, + R_I0,R_I0H, R_I1,R_I1H, R_I2,R_I2H, R_I3,R_I3H, R_I4,R_I4H, R_I5,R_I5H, R_FP,R_FPH, R_I7,R_I7H); + +// Note that a register is not allocatable unless it is also mentioned +// in a widely-used reg_class below. Thus, R_G7 and R_G0 are outside i_reg. + +alloc_class chunk1( + // The first registers listed here are those most likely to be used + // as temporaries. We move F0..F7 away from the front of the list, + // to reduce the likelihood of interferences with parameters and + // return values. Likewise, we avoid using F0/F1 for parameters, + // since they are used for return values. + // This FPU fine-tuning is worth about 1% on the SPEC geomean. + R_F8 ,R_F9 ,R_F10,R_F11,R_F12,R_F13,R_F14,R_F15, + R_F16,R_F17,R_F18,R_F19,R_F20,R_F21,R_F22,R_F23, + R_F24,R_F25,R_F26,R_F27,R_F28,R_F29,R_F30,R_F31, + R_F0 ,R_F1 ,R_F2 ,R_F3 ,R_F4 ,R_F5 ,R_F6 ,R_F7 , // used for arguments and return values + R_D32,R_D32x,R_D34,R_D34x,R_D36,R_D36x,R_D38,R_D38x, + R_D40,R_D40x,R_D42,R_D42x,R_D44,R_D44x,R_D46,R_D46x, + R_D48,R_D48x,R_D50,R_D50x,R_D52,R_D52x,R_D54,R_D54x, + R_D56,R_D56x,R_D58,R_D58x,R_D60,R_D60x,R_D62,R_D62x); + +alloc_class chunk2(CCR, FCC0, FCC1, FCC2, FCC3); + +//----------Architecture Description Register Classes-------------------------- +// Several register classes are automatically defined based upon information in +// this architecture description. +// 1) reg_class inline_cache_reg ( as defined in frame section ) +// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) +// + +// G0 is not included in integer class since it has special meaning. +reg_class g0_reg(R_G0); + +// ---------------------------- +// Integer Register Classes +// ---------------------------- +// Exclusions from i_reg: +// R_G0: hardwired zero +// R_G2: reserved by HotSpot to the TLS register (invariant within Java) +// R_G6: reserved by Solaris ABI to tools +// R_G7: reserved by Solaris ABI to libthread +// R_O7: Used as a temp in many encodings +reg_class int_reg(R_G1,R_G3,R_G4,R_G5,R_O0,R_O1,R_O2,R_O3,R_O4,R_O5,R_L0,R_L1,R_L2,R_L3,R_L4,R_L5,R_L6,R_L7,R_I0,R_I1,R_I2,R_I3,R_I4,R_I5); + +// Class for all integer registers, except the G registers. This is used for +// encodings which use G registers as temps. The regular inputs to such +// instructions use a "notemp_" prefix, as a hack to ensure that the allocator +// will not put an input into a temp register. +reg_class notemp_int_reg(R_O0,R_O1,R_O2,R_O3,R_O4,R_O5,R_L0,R_L1,R_L2,R_L3,R_L4,R_L5,R_L6,R_L7,R_I0,R_I1,R_I2,R_I3,R_I4,R_I5); + +reg_class g1_regI(R_G1); +reg_class g3_regI(R_G3); +reg_class g4_regI(R_G4); +reg_class o0_regI(R_O0); +reg_class o7_regI(R_O7); + +// ---------------------------- +// Pointer Register Classes +// ---------------------------- +// 64-bit build means 64-bit pointers means hi/lo pairs +reg_class ptr_reg( R_G1H,R_G1, R_G3H,R_G3, R_G4H,R_G4, R_G5H,R_G5, + R_O0H,R_O0, R_O1H,R_O1, R_O2H,R_O2, R_O3H,R_O3, R_O4H,R_O4, R_O5H,R_O5, + R_L0H,R_L0, R_L1H,R_L1, R_L2H,R_L2, R_L3H,R_L3, R_L4H,R_L4, R_L5H,R_L5, R_L6H,R_L6, R_L7H,R_L7, + R_I0H,R_I0, R_I1H,R_I1, R_I2H,R_I2, R_I3H,R_I3, R_I4H,R_I4, R_I5H,R_I5 ); +// Lock encodings use G3 and G4 internally +reg_class lock_ptr_reg( R_G1H,R_G1, R_G5H,R_G5, + R_O0H,R_O0, R_O1H,R_O1, R_O2H,R_O2, R_O3H,R_O3, R_O4H,R_O4, R_O5H,R_O5, + R_L0H,R_L0, R_L1H,R_L1, R_L2H,R_L2, R_L3H,R_L3, R_L4H,R_L4, R_L5H,R_L5, R_L6H,R_L6, R_L7H,R_L7, + R_I0H,R_I0, R_I1H,R_I1, R_I2H,R_I2, R_I3H,R_I3, R_I4H,R_I4, R_I5H,R_I5 ); +// Special class for storeP instructions, which can store SP or RPC to TLS. +// It is also used for memory addressing, allowing direct TLS addressing. +reg_class sp_ptr_reg( R_G1H,R_G1, R_G2H,R_G2, R_G3H,R_G3, R_G4H,R_G4, R_G5H,R_G5, + R_O0H,R_O0, R_O1H,R_O1, R_O2H,R_O2, R_O3H,R_O3, R_O4H,R_O4, R_O5H,R_O5, R_SPH,R_SP, + R_L0H,R_L0, R_L1H,R_L1, R_L2H,R_L2, R_L3H,R_L3, R_L4H,R_L4, R_L5H,R_L5, R_L6H,R_L6, R_L7H,R_L7, + R_I0H,R_I0, R_I1H,R_I1, R_I2H,R_I2, R_I3H,R_I3, R_I4H,R_I4, R_I5H,R_I5, R_FPH,R_FP ); +// R_L7 is the lowest-priority callee-save (i.e., NS) register +// We use it to save R_G2 across calls out of Java. +reg_class l7_regP(R_L7H,R_L7); + +// Other special pointer regs +reg_class g1_regP(R_G1H,R_G1); +reg_class g2_regP(R_G2H,R_G2); +reg_class g3_regP(R_G3H,R_G3); +reg_class g4_regP(R_G4H,R_G4); +reg_class g5_regP(R_G5H,R_G5); +reg_class i0_regP(R_I0H,R_I0); +reg_class o0_regP(R_O0H,R_O0); +reg_class o1_regP(R_O1H,R_O1); +reg_class o2_regP(R_O2H,R_O2); +reg_class o7_regP(R_O7H,R_O7); + + +// ---------------------------- +// Long Register Classes +// ---------------------------- +// Longs in 1 register. Aligned adjacent hi/lo pairs. +// Note: O7 is never in this class; it is sometimes used as an encoding temp. +reg_class long_reg( R_G1H,R_G1, R_G3H,R_G3, R_G4H,R_G4, R_G5H,R_G5 + ,R_O0H,R_O0, R_O1H,R_O1, R_O2H,R_O2, R_O3H,R_O3, R_O4H,R_O4, R_O5H,R_O5 +// 64-bit, longs in 1 register: use all 64-bit integer registers + ,R_L0H,R_L0, R_L1H,R_L1, R_L2H,R_L2, R_L3H,R_L3, R_L4H,R_L4, R_L5H,R_L5, R_L6H,R_L6, R_L7H,R_L7 + ,R_I0H,R_I0, R_I1H,R_I1, R_I2H,R_I2, R_I3H,R_I3, R_I4H,R_I4, R_I5H,R_I5 + ); + +reg_class g1_regL(R_G1H,R_G1); +reg_class g3_regL(R_G3H,R_G3); +reg_class o2_regL(R_O2H,R_O2); +reg_class o7_regL(R_O7H,R_O7); + +// ---------------------------- +// Special Class for Condition Code Flags Register +reg_class int_flags(CCR); +reg_class float_flags(FCC0,FCC1,FCC2,FCC3); +reg_class float_flag0(FCC0); + + +// ---------------------------- +// Float Point Register Classes +// ---------------------------- +// Skip F30/F31, they are reserved for mem-mem copies +reg_class sflt_reg(R_F0,R_F1,R_F2,R_F3,R_F4,R_F5,R_F6,R_F7,R_F8,R_F9,R_F10,R_F11,R_F12,R_F13,R_F14,R_F15,R_F16,R_F17,R_F18,R_F19,R_F20,R_F21,R_F22,R_F23,R_F24,R_F25,R_F26,R_F27,R_F28,R_F29); + +// Paired floating point registers--they show up in the same order as the floats, +// but they are used with the "Op_RegD" type, and always occur in even/odd pairs. +reg_class dflt_reg(R_F0, R_F1, R_F2, R_F3, R_F4, R_F5, R_F6, R_F7, R_F8, R_F9, R_F10,R_F11,R_F12,R_F13,R_F14,R_F15, + R_F16,R_F17,R_F18,R_F19,R_F20,R_F21,R_F22,R_F23,R_F24,R_F25,R_F26,R_F27,R_F28,R_F29, + /* Use extra V9 double registers; this AD file does not support V8 */ + R_D32,R_D32x,R_D34,R_D34x,R_D36,R_D36x,R_D38,R_D38x,R_D40,R_D40x,R_D42,R_D42x,R_D44,R_D44x,R_D46,R_D46x, + R_D48,R_D48x,R_D50,R_D50x,R_D52,R_D52x,R_D54,R_D54x,R_D56,R_D56x,R_D58,R_D58x,R_D60,R_D60x,R_D62,R_D62x + ); + +// Paired floating point registers--they show up in the same order as the floats, +// but they are used with the "Op_RegD" type, and always occur in even/odd pairs. +// This class is usable for mis-aligned loads as happen in I2C adapters. +reg_class dflt_low_reg(R_F0, R_F1, R_F2, R_F3, R_F4, R_F5, R_F6, R_F7, R_F8, R_F9, R_F10,R_F11,R_F12,R_F13,R_F14,R_F15, + R_F16,R_F17,R_F18,R_F19,R_F20,R_F21,R_F22,R_F23,R_F24,R_F25,R_F26,R_F27,R_F28,R_F29); +%} + +//----------DEFINITION BLOCK--------------------------------------------------- +// Define name --> value mappings to inform the ADLC of an integer valued name +// Current support includes integer values in the range [0, 0x7FFFFFFF] +// Format: +// int_def ( , ); +// Generated Code in ad_.hpp +// #define () +// // value == +// Generated code in ad_.cpp adlc_verification() +// assert( == , "Expect () to equal "); +// +definitions %{ +// The default cost (of an ALU instruction). + int_def DEFAULT_COST ( 100, 100); + int_def HUGE_COST (1000000, 1000000); + +// Memory refs are twice as expensive as run-of-the-mill. + int_def MEMORY_REF_COST ( 200, DEFAULT_COST * 2); + +// Branches are even more expensive. + int_def BRANCH_COST ( 300, DEFAULT_COST * 3); + int_def CALL_COST ( 300, DEFAULT_COST * 3); +%} + + +//----------SOURCE BLOCK------------------------------------------------------- +// This is a block of C++ code which provides values, functions, and +// definitions necessary in the rest of the architecture description +source_hpp %{ +// Header information of the source block. +// Method declarations/definitions which are used outside +// the ad-scope can conveniently be defined here. +// +// To keep related declarations/definitions/uses close together, +// we switch between source %{ }% and source_hpp %{ }% freely as needed. + +#include "oops/klass.inline.hpp" + +// Must be visible to the DFA in dfa_sparc.cpp +extern bool can_branch_register( Node *bol, Node *cmp ); + +extern bool use_block_zeroing(Node* count); + +// Macros to extract hi & lo halves from a long pair. +// G0 is not part of any long pair, so assert on that. +// Prevents accidentally using G1 instead of G0. +#define LONG_HI_REG(x) (x) +#define LONG_LO_REG(x) (x) + +class CallStubImpl { + + //-------------------------------------------------------------- + //---< Used for optimization in Compile::Shorten_branches >--- + //-------------------------------------------------------------- + + public: + // Size of call trampoline stub. + static uint size_call_trampoline() { + return 0; // no call trampolines on this platform + } + + // number of relocations needed by a call trampoline stub + static uint reloc_call_trampoline() { + return 0; // no call trampolines on this platform + } +}; + +class HandlerImpl { + + public: + + static int emit_exception_handler(CodeBuffer &cbuf); + static int emit_deopt_handler(CodeBuffer& cbuf); + + static uint size_exception_handler() { + return ( NativeJump::instruction_size ); // sethi;jmp;nop + } + + static uint size_deopt_handler() { + return ( 4+ NativeJump::instruction_size ); // save;sethi;jmp;restore + } +}; + +class Node::PD { +public: + enum NodeFlags { + _last_flag = Node::_last_flag + }; +}; + +%} + +source %{ +#define __ _masm. + +// tertiary op of a LoadP or StoreP encoding +#define REGP_OP true + +static FloatRegister reg_to_SingleFloatRegister_object(int register_encoding); +static FloatRegister reg_to_DoubleFloatRegister_object(int register_encoding); +static Register reg_to_register_object(int register_encoding); + +void PhaseOutput::pd_perform_mach_node_analysis() { +} + +int MachNode::pd_alignment_required() const { + return 1; +} + +int MachNode::compute_padding(int current_offset) const { + return 0; +} + +// Originally this was guarded internally by BranchOnRegister, for +// simplicity keep the function but always return false. +// Used by the DFA in dfa_sparc.cpp. +// Check for being able to use a V9 branch-on-register. Requires a +// compare-vs-zero, equal/not-equal, of a value which was zero- or sign- +// extended. Doesn't work following an integer ADD, for example, because of +// overflow (-1 incremented yields 0 plus a carry in the high-order word). On +// 32-bit V9 systems, interrupts currently blow away the high-order 32 bits and +// replace them with zero, which could become sign-extension in a different OS +// release. There's no obvious reason why an interrupt will ever fill these +// bits with non-zero junk (the registers are reloaded with standard LD +// instructions which either zero-fill or sign-fill). +bool can_branch_register( Node *bol, Node *cmp ) { + return false; +} + +bool use_block_zeroing(Node* count) { + // Use BIS for zeroing if count is not constant + // or it is >= BlockZeroingLowLimit. + return UseBlockZeroing && (count->find_intptr_t_con(BlockZeroingLowLimit) >= BlockZeroingLowLimit); +} + +// **************************************************************************** + +// REQUIRED FUNCTIONALITY + +// !!!!! Special hack to get all type of calls to specify the byte offset +// from the start of the call to the point where the return address +// will point. +// The "return address" is the address of the call instruction, plus 8. + +int MachCallStaticJavaNode::ret_addr_offset() { + int offset = NativeCall::instruction_size; // call; delay slot + if (_method_handle_invoke) + offset += 4; // restore SP + return offset; +} + +int MachCallDynamicJavaNode::ret_addr_offset() { + int vtable_index = this->_vtable_index; + if (vtable_index < 0) { + // must be invalid_vtable_index, not nonvirtual_vtable_index + assert(vtable_index == Method::invalid_vtable_index, "correct sentinel value"); + return (NativeMovConstReg::instruction_size + + NativeCall::instruction_size); // sethi; setlo; call; delay slot + } else { + assert(!UseInlineCaches, "expect vtable calls only if not using ICs"); + int entry_offset = in_bytes(Klass::vtable_start_offset()) + vtable_index*vtableEntry::size_in_bytes(); + int v_off = entry_offset + vtableEntry::method_offset_in_bytes(); + int klass_load_size; + if (UseCompressedClassPointers) { + assert(Universe::heap() != NULL, "java heap should be initialized"); + klass_load_size = MacroAssembler::instr_size_for_decode_klass_not_null() + 1*BytesPerInstWord; + } else { + klass_load_size = 1*BytesPerInstWord; + } + if (Assembler::is_simm13(v_off)) { + return klass_load_size + + (2*BytesPerInstWord + // ld_ptr, ld_ptr + NativeCall::instruction_size); // call; delay slot + } else { + return klass_load_size + + (4*BytesPerInstWord + // set_hi, set, ld_ptr, ld_ptr + NativeCall::instruction_size); // call; delay slot + } + } +} + +int MachCallRuntimeNode::ret_addr_offset() { + if (MacroAssembler::is_far_target(entry_point())) { + return NativeFarCall::instruction_size; + } else { + return NativeCall::instruction_size; + } +} + +int MachCallNativeNode::ret_addr_offset() { + Unimplemented(); + return -1; +} + +// emit an interrupt that is caught by the debugger (for debugging compiler) +void emit_break(CodeBuffer &cbuf) { + C2_MacroAssembler _masm(&cbuf); + __ breakpoint_trap(); +} + +#ifndef PRODUCT +void MachBreakpointNode::format( PhaseRegAlloc *, outputStream *st ) const { + st->print("TA"); +} +#endif + +void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + emit_break(cbuf); +} + +uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + +// Traceable jump +void emit_jmpl(CodeBuffer &cbuf, int jump_target) { + C2_MacroAssembler _masm(&cbuf); + Register rdest = reg_to_register_object(jump_target); + __ JMP(rdest, 0); + __ delayed()->nop(); +} + +// Traceable jump and set exception pc +void emit_jmpl_set_exception_pc(CodeBuffer &cbuf, int jump_target) { + C2_MacroAssembler _masm(&cbuf); + Register rdest = reg_to_register_object(jump_target); + __ JMP(rdest, 0); + __ delayed()->add(O7, frame::pc_return_offset, Oissuing_pc ); +} + +void emit_nop(CodeBuffer &cbuf) { + C2_MacroAssembler _masm(&cbuf); + __ nop(); +} + +void emit_illtrap(CodeBuffer &cbuf) { + C2_MacroAssembler _masm(&cbuf); + __ illtrap(0); +} + + +intptr_t get_offset_from_base(const MachNode* n, const TypePtr* atype, int disp32) { + assert(n->rule() != loadUB_rule, ""); + + intptr_t offset = 0; + const TypePtr *adr_type = TYPE_PTR_SENTINAL; // Check for base==RegI, disp==immP + const Node* addr = n->get_base_and_disp(offset, adr_type); + assert(adr_type == (const TypePtr*)-1, "VerifyOops: no support for sparc operands with base==RegI, disp==immP"); + assert(addr != NULL && addr != (Node*)-1, "invalid addr"); + assert(addr->bottom_type()->isa_oopptr() == atype, ""); + atype = atype->add_offset(offset); + assert(disp32 == offset, "wrong disp32"); + return atype->_offset; +} + + +intptr_t get_offset_from_base_2(const MachNode* n, const TypePtr* atype, int disp32) { + assert(n->rule() != loadUB_rule, ""); + + intptr_t offset = 0; + Node* addr = n->in(2); + assert(addr->bottom_type()->isa_oopptr() == atype, ""); + if (addr->is_Mach() && addr->as_Mach()->ideal_Opcode() == Op_AddP) { + Node* a = addr->in(2/*AddPNode::Address*/); + Node* o = addr->in(3/*AddPNode::Offset*/); + offset = o->is_Con() ? o->bottom_type()->is_intptr_t()->get_con() : Type::OffsetBot; + atype = a->bottom_type()->is_ptr()->add_offset(offset); + assert(atype->isa_oop_ptr(), "still an oop"); + } + offset = atype->is_ptr()->_offset; + if (offset != Type::OffsetBot) offset += disp32; + return offset; +} + +static inline jlong replicate_immI(int con, int count, int width) { + // Load a constant replicated "count" times with width "width" + assert(count*width == 8 && width <= 4, "sanity"); + int bit_width = width * 8; + jlong val = con; + val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits + for (int i = 0; i < count - 1; i++) { + val |= (val << bit_width); + } + return val; +} + +static inline jlong replicate_immF(float con) { + // Replicate float con 2 times and pack into vector. + int val = *((int*)&con); + jlong lval = val; + lval = (lval << 32) | (lval & 0xFFFFFFFFl); + return lval; +} + +// Standard Sparc opcode form2 field breakdown +static inline void emit2_19(CodeBuffer &cbuf, int f30, int f29, int f25, int f22, int f20, int f19, int f0 ) { + f0 &= (1<<19)-1; // Mask displacement to 19 bits + int op = (f30 << 30) | + (f29 << 29) | + (f25 << 25) | + (f22 << 22) | + (f20 << 20) | + (f19 << 19) | + (f0 << 0); + cbuf.insts()->emit_int32(op); +} + +// Standard Sparc opcode form2 field breakdown +static inline void emit2_22(CodeBuffer &cbuf, int f30, int f25, int f22, int f0 ) { + f0 >>= 10; // Drop 10 bits + f0 &= (1<<22)-1; // Mask displacement to 22 bits + int op = (f30 << 30) | + (f25 << 25) | + (f22 << 22) | + (f0 << 0); + cbuf.insts()->emit_int32(op); +} + +// Standard Sparc opcode form3 field breakdown +static inline void emit3(CodeBuffer &cbuf, int f30, int f25, int f19, int f14, int f5, int f0 ) { + int op = (f30 << 30) | + (f25 << 25) | + (f19 << 19) | + (f14 << 14) | + (f5 << 5) | + (f0 << 0); + cbuf.insts()->emit_int32(op); +} + +// Standard Sparc opcode form3 field breakdown +static inline void emit3_simm13(CodeBuffer &cbuf, int f30, int f25, int f19, int f14, int simm13 ) { + simm13 &= (1<<13)-1; // Mask to 13 bits + int op = (f30 << 30) | + (f25 << 25) | + (f19 << 19) | + (f14 << 14) | + (1 << 13) | // bit to indicate immediate-mode + (simm13<<0); + cbuf.insts()->emit_int32(op); +} + +static inline void emit3_simm10(CodeBuffer &cbuf, int f30, int f25, int f19, int f14, int simm10 ) { + simm10 &= (1<<10)-1; // Mask to 10 bits + emit3_simm13(cbuf,f30,f25,f19,f14,simm10); +} + +#ifdef ASSERT +// Helper function for VerifyOops in emit_form3_mem_reg +void verify_oops_warning(const MachNode *n, int ideal_op, int mem_op) { + warning("VerifyOops encountered unexpected instruction:"); + n->dump(2); + warning("Instruction has ideal_Opcode==Op_%s and op_ld==Op_%s \n", NodeClassNames[ideal_op], NodeClassNames[mem_op]); +} +#endif + + +void emit_form3_mem_reg(CodeBuffer &cbuf, PhaseRegAlloc* ra, const MachNode* n, int primary, int tertiary, + int src1_enc, int disp32, int src2_enc, int dst_enc) { + +#ifdef ASSERT + // The following code implements the +VerifyOops feature. + // It verifies oop values which are loaded into or stored out of + // the current method activation. +VerifyOops complements techniques + // like ScavengeALot, because it eagerly inspects oops in transit, + // as they enter or leave the stack, as opposed to ScavengeALot, + // which inspects oops "at rest", in the stack or heap, at safepoints. + // For this reason, +VerifyOops can sometimes detect bugs very close + // to their point of creation. It can also serve as a cross-check + // on the validity of oop maps, when used toegether with ScavengeALot. + + // It would be good to verify oops at other points, especially + // when an oop is used as a base pointer for a load or store. + // This is presently difficult, because it is hard to know when + // a base address is biased or not. (If we had such information, + // it would be easy and useful to make a two-argument version of + // verify_oop which unbiases the base, and performs verification.) + + assert((uint)tertiary == 0xFFFFFFFF || tertiary == REGP_OP, "valid tertiary"); + bool is_verified_oop_base = false; + bool is_verified_oop_load = false; + bool is_verified_oop_store = false; + int tmp_enc = -1; + if (VerifyOops && src1_enc != R_SP_enc) { + // classify the op, mainly for an assert check + int st_op = 0, ld_op = 0; + switch (primary) { + case Assembler::stb_op3: st_op = Op_StoreB; break; + case Assembler::sth_op3: st_op = Op_StoreC; break; + case Assembler::stx_op3: // may become StoreP or stay StoreI or StoreD0 + case Assembler::stw_op3: st_op = Op_StoreI; break; + case Assembler::std_op3: st_op = Op_StoreL; break; + case Assembler::stf_op3: st_op = Op_StoreF; break; + case Assembler::stdf_op3: st_op = Op_StoreD; break; + + case Assembler::ldsb_op3: ld_op = Op_LoadB; break; + case Assembler::ldub_op3: ld_op = Op_LoadUB; break; + case Assembler::lduh_op3: ld_op = Op_LoadUS; break; + case Assembler::ldsh_op3: ld_op = Op_LoadS; break; + case Assembler::ldx_op3: // may become LoadP or stay LoadI + case Assembler::ldsw_op3: // may become LoadP or stay LoadI + case Assembler::lduw_op3: ld_op = Op_LoadI; break; + case Assembler::ldd_op3: ld_op = Op_LoadL; break; + case Assembler::ldf_op3: ld_op = Op_LoadF; break; + case Assembler::lddf_op3: ld_op = Op_LoadD; break; + case Assembler::prefetch_op3: ld_op = Op_LoadI; break; + + default: ShouldNotReachHere(); + } + if (tertiary == REGP_OP) { + if (st_op == Op_StoreI) st_op = Op_StoreP; + else if (ld_op == Op_LoadI) ld_op = Op_LoadP; + else ShouldNotReachHere(); + if (st_op) { + // a store + // inputs are (0:control, 1:memory, 2:address, 3:value) + Node* n2 = n->in(3); + if (n2 != NULL) { + const Type* t = n2->bottom_type(); + is_verified_oop_store = t->isa_oop_ptr() ? (t->is_ptr()->_offset==0) : false; + } + } else { + // a load + const Type* t = n->bottom_type(); + is_verified_oop_load = t->isa_oop_ptr() ? (t->is_ptr()->_offset==0) : false; + } + } + + if (ld_op) { + // a Load + // inputs are (0:control, 1:memory, 2:address) + if (!(n->ideal_Opcode()==ld_op) && // Following are special cases + !(n->ideal_Opcode()==Op_LoadPLocked && ld_op==Op_LoadP) && + !(n->ideal_Opcode()==Op_LoadI && ld_op==Op_LoadF) && + !(n->ideal_Opcode()==Op_LoadF && ld_op==Op_LoadI) && + !(n->ideal_Opcode()==Op_LoadRange && ld_op==Op_LoadI) && + !(n->ideal_Opcode()==Op_LoadKlass && ld_op==Op_LoadP) && + !(n->ideal_Opcode()==Op_LoadL && ld_op==Op_LoadI) && + !(n->ideal_Opcode()==Op_LoadL_unaligned && ld_op==Op_LoadI) && + !(n->ideal_Opcode()==Op_LoadD_unaligned && ld_op==Op_LoadF) && + !(n->ideal_Opcode()==Op_ConvI2F && ld_op==Op_LoadF) && + !(n->ideal_Opcode()==Op_ConvI2D && ld_op==Op_LoadF) && + !(n->ideal_Opcode()==Op_PrefetchAllocation && ld_op==Op_LoadI) && + !(n->ideal_Opcode()==Op_LoadVector && ld_op==Op_LoadD) && + !(n->rule() == loadUB_rule)) { + verify_oops_warning(n, n->ideal_Opcode(), ld_op); + } + } else if (st_op) { + // a Store + // inputs are (0:control, 1:memory, 2:address, 3:value) + if (!(n->ideal_Opcode()==st_op) && // Following are special cases + !(n->ideal_Opcode()==Op_StoreCM && st_op==Op_StoreB) && + !(n->ideal_Opcode()==Op_StoreI && st_op==Op_StoreF) && + !(n->ideal_Opcode()==Op_StoreF && st_op==Op_StoreI) && + !(n->ideal_Opcode()==Op_StoreL && st_op==Op_StoreI) && + !(n->ideal_Opcode()==Op_StoreVector && st_op==Op_StoreD) && + !(n->ideal_Opcode()==Op_StoreD && st_op==Op_StoreI && n->rule() == storeD0_rule)) { + verify_oops_warning(n, n->ideal_Opcode(), st_op); + } + } + + if (src2_enc == R_G0_enc && n->rule() != loadUB_rule && n->ideal_Opcode() != Op_StoreCM ) { + Node* addr = n->in(2); + if (!(addr->is_Mach() && addr->as_Mach()->ideal_Opcode() == Op_AddP)) { + const TypeOopPtr* atype = addr->bottom_type()->isa_instptr(); // %%% oopptr? + if (atype != NULL) { + intptr_t offset = get_offset_from_base(n, atype, disp32); + intptr_t offset_2 = get_offset_from_base_2(n, atype, disp32); + if (offset != offset_2) { + get_offset_from_base(n, atype, disp32); + get_offset_from_base_2(n, atype, disp32); + } + assert(offset == offset_2, "different offsets"); + if (offset == disp32) { + // we now know that src1 is a true oop pointer + is_verified_oop_base = true; + if (ld_op && src1_enc == dst_enc && ld_op != Op_LoadF && ld_op != Op_LoadD) { + if( primary == Assembler::ldd_op3 ) { + is_verified_oop_base = false; // Cannot 'ldd' into O7 + } else { + tmp_enc = dst_enc; + dst_enc = R_O7_enc; // Load into O7; preserve source oop + assert(src1_enc != dst_enc, ""); + } + } + } + if (st_op && (( offset == oopDesc::klass_offset_in_bytes()) + || offset == oopDesc::mark_offset_in_bytes())) { + // loading the mark should not be allowed either, but + // we don't check this since it conflicts with InlineObjectHash + // usage of LoadINode to get the mark. We could keep the + // check if we create a new LoadMarkNode + // but do not verify the object before its header is initialized + ShouldNotReachHere(); + } + } + } + } + } +#endif + + uint instr = (Assembler::ldst_op << 30) + | (dst_enc << 25) + | (primary << 19) + | (src1_enc << 14); + + uint index = src2_enc; + int disp = disp32; + + if (src1_enc == R_SP_enc || src1_enc == R_FP_enc) { + disp += STACK_BIAS; + // Check that stack offset fits, load into O7 if not + if (!Assembler::is_simm13(disp)) { + C2_MacroAssembler _masm(&cbuf); + __ set(disp, O7); + if (index != R_G0_enc) { + __ add(O7, reg_to_register_object(index), O7); + } + index = R_O7_enc; + disp = 0; + } + } + + if( disp == 0 ) { + // use reg-reg form + // bit 13 is already zero + instr |= index; + } else { + // use reg-imm form + instr |= 0x00002000; // set bit 13 to one + instr |= disp & 0x1FFF; + } + + cbuf.insts()->emit_int32(instr); + +#ifdef ASSERT + if (VerifyOops) { + C2_MacroAssembler _masm(&cbuf); + if (is_verified_oop_base) { + __ verify_oop(reg_to_register_object(src1_enc)); + } + if (is_verified_oop_store) { + __ verify_oop(reg_to_register_object(dst_enc)); + } + if (tmp_enc != -1) { + __ mov(O7, reg_to_register_object(tmp_enc)); + } + if (is_verified_oop_load) { + __ verify_oop(reg_to_register_object(dst_enc)); + } + } +#endif +} + +void emit_call_reloc(CodeBuffer &cbuf, intptr_t entry_point, RelocationHolder const& rspec, bool preserve_g2 = false) { + // The method which records debug information at every safepoint + // expects the call to be the first instruction in the snippet as + // it creates a PcDesc structure which tracks the offset of a call + // from the start of the codeBlob. This offset is computed as + // code_end() - code_begin() of the code which has been emitted + // so far. + // In this particular case we have skirted around the problem by + // putting the "mov" instruction in the delay slot but the problem + // may bite us again at some other point and a cleaner/generic + // solution using relocations would be needed. + C2_MacroAssembler _masm(&cbuf); + __ set_inst_mark(); + + // We flush the current window just so that there is a valid stack copy + // the fact that the current window becomes active again instantly is + // not a problem there is nothing live in it. + +#ifdef ASSERT + int startpos = __ offset(); +#endif /* ASSERT */ + + __ call((address)entry_point, rspec); + + if (preserve_g2) __ delayed()->mov(G2, L7); + else __ delayed()->nop(); + + if (preserve_g2) __ mov(L7, G2); + +#ifdef ASSERT + if (preserve_g2 && VerifyOops) { + // Trash argument dump slots. + __ set(0xb0b8ac0db0b8ac0d, G1); + __ mov(G1, G5); + __ stx(G1, SP, STACK_BIAS + 0x80); + __ stx(G1, SP, STACK_BIAS + 0x88); + __ stx(G1, SP, STACK_BIAS + 0x90); + __ stx(G1, SP, STACK_BIAS + 0x98); + __ stx(G1, SP, STACK_BIAS + 0xA0); + __ stx(G1, SP, STACK_BIAS + 0xA8); + } +#endif /*ASSERT*/ +} + +//============================================================================= +// REQUIRED FUNCTIONALITY for encoding +void emit_lo(CodeBuffer &cbuf, int val) { } +void emit_hi(CodeBuffer &cbuf, int val) { } + + +//============================================================================= +const RegMask& MachConstantBaseNode::_out_RegMask = PTR_REG_mask(); + +int ConstantTable::calculate_table_base_offset() const { + int offset = -(size() / 2); + if (!Assembler::is_simm13(offset)) { + offset = -4096; + } + return offset; +} + +bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } +void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { + ShouldNotReachHere(); +} + +void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { + Compile* C = ra_->C; + ConstantTable& constant_table = C->output()->constant_table(); + C2_MacroAssembler _masm(&cbuf); + + Register r = as_Register(ra_->get_encode(this)); + CodeSection* consts_section = __ code()->consts(); + int consts_size = consts_section->align_at_start(consts_section->size()); + assert(constant_table.size() == consts_size, "must be: %d == %d", constant_table.size(), consts_size); + + // Materialize the constant table base. + address baseaddr = consts_section->start() + -(constant_table.table_base_offset()); + RelocationHolder rspec = internal_word_Relocation::spec(baseaddr); + AddressLiteral base(baseaddr, rspec); + __ set(base, r); +} + +uint MachConstantBaseNode::size(PhaseRegAlloc*) const { + return MacroAssembler::worst_case_insts_for_set() * BytesPerInstWord; +} + +#ifndef PRODUCT +void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { + char reg[128]; + ra_->dump_register(this, reg, sizeof(reg)); + st->print("SET &constanttable,%s\t! constant table base", reg); +} +#endif + + +//============================================================================= + +#ifndef PRODUCT +void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream *st ) const { + Compile* C = ra_->C; + + for (int i = 0; i < OptoPrologueNops; i++) { + st->print_cr("NOP"); st->print("\t"); + } + + if( VerifyThread ) { + st->print_cr("Verify_Thread"); st->print("\t"); + } + + size_t framesize = C->output()->frame_size_in_bytes(); + int bangsize = C->output()->bang_size_in_bytes(); + + // Calls to C2R adapters often do not accept exceptional returns. + // We require that their callers must bang for them. But be careful, because + // some VM calls (such as call site linkage) can use several kilobytes of + // stack. But the stack safety zone should account for that. + // See bugs 4446381, 4468289, 4497237. + if (C->output()->need_stack_bang(bangsize)) { + st->print_cr("! stack bang (%d bytes)", bangsize); st->print("\t"); + } + + if (Assembler::is_simm13(-framesize)) { + st->print ("SAVE R_SP,-" SIZE_FORMAT ",R_SP",framesize); + } else { + st->print_cr("SETHI R_SP,hi%%(-" SIZE_FORMAT "),R_G3",framesize); st->print("\t"); + st->print_cr("ADD R_G3,lo%%(-" SIZE_FORMAT "),R_G3",framesize); st->print("\t"); + st->print ("SAVE R_SP,R_G3,R_SP"); + } + +} +#endif + +void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + Compile* C = ra_->C; + C2_MacroAssembler _masm(&cbuf); + + for (int i = 0; i < OptoPrologueNops; i++) { + __ nop(); + } + + __ verify_thread(); + + size_t framesize = C->output()->frame_size_in_bytes(); + assert(framesize >= 16*wordSize, "must have room for reg. save area"); + assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment"); + int bangsize = C->output()->bang_size_in_bytes(); + + // Calls to C2R adapters often do not accept exceptional returns. + // We require that their callers must bang for them. But be careful, because + // some VM calls (such as call site linkage) can use several kilobytes of + // stack. But the stack safety zone should account for that. + // See bugs 4446381, 4468289, 4497237. + if (C->output()->need_stack_bang(bangsize)) { + __ generate_stack_overflow_check(bangsize); + } + + if (Assembler::is_simm13(-framesize)) { + __ save(SP, -framesize, SP); + } else { + __ sethi(-framesize & ~0x3ff, G3); + __ add(G3, -framesize & 0x3ff, G3); + __ save(SP, G3, SP); + } + C->output()->set_frame_complete( __ offset() ); + + if (C->has_mach_constant_base_node()) { + // NOTE: We set the table base offset here because users might be + // emitted before MachConstantBaseNode. + ConstantTable& constant_table = C->output()->constant_table(); + constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); + } +} + +uint MachPrologNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + +int MachPrologNode::reloc() const { + return 10; // a large enough number +} + +//============================================================================= +#ifndef PRODUCT +void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream *st ) const { + Compile* C = ra_->C; + + if(do_polling() && ra_->C->is_method_compilation()) { + st->print("LDX [R_G2 + #poll_offset],L0\t! Load local polling address\n\t"); + st->print("LDX [L0],G0\t!Poll for Safepointing\n\t"); + } + + if(do_polling()) { + if (UseCBCond && !ra_->C->is_method_compilation()) { + st->print("NOP\n\t"); + } + st->print("RET\n\t"); + } + + st->print("RESTORE"); +} +#endif + +void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + C2_MacroAssembler _masm(&cbuf); + Compile* C = ra_->C; + + __ verify_thread(); + + if (StackReservedPages > 0 && C->has_reserved_stack_access()) { + __ reserved_stack_check(); + } + + // If this does safepoint polling, then do it here + if(do_polling() && ra_->C->is_method_compilation()) { + __ ld_ptr(Address(G2_thread, JavaThread::polling_page_offset()), L0); + __ relocate(relocInfo::poll_return_type); + __ ld_ptr(L0, 0, G0); + } + + // If this is a return, then stuff the restore in the delay slot + if(do_polling()) { + if (UseCBCond && !ra_->C->is_method_compilation()) { + // Insert extra padding for the case when the epilogue is preceded by + // a cbcond jump, which can't be followed by a CTI instruction + __ nop(); + } + __ ret(); + __ delayed()->restore(); + } else { + __ restore(); + } +} + +uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + +int MachEpilogNode::reloc() const { + return 16; // a large enough number +} + +const Pipeline * MachEpilogNode::pipeline() const { + return MachNode::pipeline_class(); +} + +//============================================================================= + +// Figure out which register class each belongs in: rc_int, rc_float, rc_stack +enum RC { rc_bad, rc_int, rc_float, rc_stack }; +static enum RC rc_class( OptoReg::Name reg ) { + if (!OptoReg::is_valid(reg)) return rc_bad; + if (OptoReg::is_stack(reg)) return rc_stack; + VMReg r = OptoReg::as_VMReg(reg); + if (r->is_Register()) return rc_int; + assert(r->is_FloatRegister(), "must be"); + return rc_float; +} + +#ifndef PRODUCT +ATTRIBUTE_PRINTF(2, 3) +static void print_helper(outputStream* st, const char* format, ...) { + const int tab_size = 8; + if (st->position() > tab_size) { + st->cr(); + st->sp(); + } + va_list ap; + va_start(ap, format); + st->vprint(format, ap); + va_end(ap); +} +#endif // !PRODUCT + +static void impl_helper(const MachNode* mach, CodeBuffer* cbuf, PhaseRegAlloc* ra, bool is_load, int offset, int reg, int opcode, const char *op_str, outputStream* st) { + if (cbuf) { + emit_form3_mem_reg(*cbuf, ra, mach, opcode, -1, R_SP_enc, offset, 0, Matcher::_regEncode[reg]); + } +#ifndef PRODUCT + else { + if (is_load) { + print_helper(st, "%s [R_SP + #%d],R_%s\t! spill", op_str, offset, OptoReg::regname(reg)); + } else { + print_helper(st, "%s R_%s,[R_SP + #%d]\t! spill", op_str, OptoReg::regname(reg), offset); + } + } +#endif +} + +static void impl_mov_helper(CodeBuffer *cbuf, int src, int dst, int op1, int op2, const char *op_str, outputStream* st) { + if (cbuf) { + emit3(*cbuf, Assembler::arith_op, Matcher::_regEncode[dst], op1, 0, op2, Matcher::_regEncode[src]); + } +#ifndef PRODUCT + else { + print_helper(st, "%s R_%s,R_%s\t! spill", op_str, OptoReg::regname(src), OptoReg::regname(dst)); + } +#endif +} + +static void mach_spill_copy_implementation_helper(const MachNode* mach, + CodeBuffer *cbuf, + PhaseRegAlloc *ra_, + outputStream* st) { + // Get registers to move + OptoReg::Name src_second = ra_->get_reg_second(mach->in(1)); + OptoReg::Name src_first = ra_->get_reg_first(mach->in(1)); + OptoReg::Name dst_second = ra_->get_reg_second(mach); + OptoReg::Name dst_first = ra_->get_reg_first(mach); + + enum RC src_second_rc = rc_class(src_second); + enum RC src_first_rc = rc_class(src_first); + enum RC dst_second_rc = rc_class(dst_second); + enum RC dst_first_rc = rc_class(dst_first); + + assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register"); + + if (src_first == dst_first && src_second == dst_second) { + return; // Self copy, no move + } + + // -------------------------------------- + // Check for mem-mem move. Load into unused float registers and fall into + // the float-store case. + if (src_first_rc == rc_stack && dst_first_rc == rc_stack) { + int offset = ra_->reg2offset(src_first); + // Further check for aligned-adjacent pair, so we can use a double load + if ((src_first&1) == 0 && src_first+1 == src_second) { + src_second = OptoReg::Name(R_F31_num); + src_second_rc = rc_float; + impl_helper(mach, cbuf, ra_, true, offset, R_F30_num, Assembler::lddf_op3, "LDDF", st); + } else { + impl_helper(mach, cbuf, ra_, true, offset, R_F30_num, Assembler::ldf_op3, "LDF ", st); + } + src_first = OptoReg::Name(R_F30_num); + src_first_rc = rc_float; + } + + if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { + int offset = ra_->reg2offset(src_second); + impl_helper(mach, cbuf, ra_, true, offset, R_F31_num, Assembler::ldf_op3, "LDF ", st); + src_second = OptoReg::Name(R_F31_num); + src_second_rc = rc_float; + } + + // -------------------------------------- + // Check for float->int copy; requires a trip through memory + if (src_first_rc == rc_float && dst_first_rc == rc_int && UseVIS < 3) { + int offset = frame::register_save_words*wordSize; + if (cbuf) { + emit3_simm13(*cbuf, Assembler::arith_op, R_SP_enc, Assembler::sub_op3, R_SP_enc, 16); + impl_helper(mach, cbuf, ra_, false, offset, src_first, Assembler::stf_op3, "STF ", st); + impl_helper(mach, cbuf, ra_, true, offset, dst_first, Assembler::lduw_op3, "LDUW", st); + emit3_simm13(*cbuf, Assembler::arith_op, R_SP_enc, Assembler::add_op3, R_SP_enc, 16); + } +#ifndef PRODUCT + else { + print_helper(st, "SUB R_SP,16,R_SP"); + impl_helper(mach, cbuf, ra_, false, offset, src_first, Assembler::stf_op3, "STF ", st); + impl_helper(mach, cbuf, ra_, true, offset, dst_first, Assembler::lduw_op3, "LDUW", st); + print_helper(st, "ADD R_SP,16,R_SP"); + } +#endif + } + + // Check for float->int copy on T4 + if (src_first_rc == rc_float && dst_first_rc == rc_int && UseVIS >= 3) { + // Further check for aligned-adjacent pair, so we can use a double move + if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + impl_mov_helper(cbuf, src_first, dst_first, Assembler::mftoi_op3, Assembler::mdtox_opf, "MOVDTOX", st); + return; + } + impl_mov_helper(cbuf, src_first, dst_first, Assembler::mftoi_op3, Assembler::mstouw_opf, "MOVSTOUW", st); + } + // Check for int->float copy on T4 + if (src_first_rc == rc_int && dst_first_rc == rc_float && UseVIS >= 3) { + // Further check for aligned-adjacent pair, so we can use a double move + if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + impl_mov_helper(cbuf, src_first, dst_first, Assembler::mftoi_op3, Assembler::mxtod_opf, "MOVXTOD", st); + return; + } + impl_mov_helper(cbuf, src_first, dst_first, Assembler::mftoi_op3, Assembler::mwtos_opf, "MOVWTOS", st); + } + + // -------------------------------------- + // In the 32-bit 1-reg-longs build ONLY, I see mis-aligned long destinations. + // In such cases, I have to do the big-endian swap. For aligned targets, the + // hardware does the flop for me. Doubles are always aligned, so no problem + // there. Misaligned sources only come from native-long-returns (handled + // special below). + + // -------------------------------------- + // Check for integer reg-reg copy + if (src_first_rc == rc_int && dst_first_rc == rc_int) { + // Else normal reg-reg copy + assert(src_second != dst_first, "smashed second before evacuating it"); + impl_mov_helper(cbuf, src_first, dst_first, Assembler::or_op3, 0, "MOV ", st); + assert((src_first & 1) == 0 && (dst_first & 1) == 0, "never move second-halves of int registers"); + // This moves an aligned adjacent pair. + // See if we are done. + if (src_first + 1 == src_second && dst_first + 1 == dst_second) { + return; + } + } + + // Check for integer store + if (src_first_rc == rc_int && dst_first_rc == rc_stack) { + int offset = ra_->reg2offset(dst_first); + // Further check for aligned-adjacent pair, so we can use a double store + if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + impl_helper(mach, cbuf, ra_, false, offset, src_first, Assembler::stx_op3, "STX ", st); + return; + } + impl_helper(mach, cbuf, ra_, false, offset, src_first, Assembler::stw_op3, "STW ", st); + } + + // Check for integer load + if (dst_first_rc == rc_int && src_first_rc == rc_stack) { + int offset = ra_->reg2offset(src_first); + // Further check for aligned-adjacent pair, so we can use a double load + if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + impl_helper(mach, cbuf, ra_, true, offset, dst_first, Assembler::ldx_op3, "LDX ", st); + return; + } + impl_helper(mach, cbuf, ra_, true, offset, dst_first, Assembler::lduw_op3, "LDUW", st); + } + + // Check for float reg-reg copy + if (src_first_rc == rc_float && dst_first_rc == rc_float) { + // Further check for aligned-adjacent pair, so we can use a double move + if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + impl_mov_helper(cbuf, src_first, dst_first, Assembler::fpop1_op3, Assembler::fmovd_opf, "FMOVD", st); + return; + } + impl_mov_helper(cbuf, src_first, dst_first, Assembler::fpop1_op3, Assembler::fmovs_opf, "FMOVS", st); + } + + // Check for float store + if (src_first_rc == rc_float && dst_first_rc == rc_stack) { + int offset = ra_->reg2offset(dst_first); + // Further check for aligned-adjacent pair, so we can use a double store + if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + impl_helper(mach, cbuf, ra_, false, offset, src_first, Assembler::stdf_op3, "STDF", st); + return; + } + impl_helper(mach, cbuf, ra_, false, offset, src_first, Assembler::stf_op3, "STF ", st); + } + + // Check for float load + if (dst_first_rc == rc_float && src_first_rc == rc_stack) { + int offset = ra_->reg2offset(src_first); + // Further check for aligned-adjacent pair, so we can use a double load + if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + impl_helper(mach, cbuf, ra_, true, offset, dst_first, Assembler::lddf_op3, "LDDF", st); + return; + } + impl_helper(mach, cbuf, ra_, true, offset, dst_first, Assembler::ldf_op3, "LDF ", st); + } + + // -------------------------------------------------------------------- + // Check for hi bits still needing moving. Only happens for misaligned + // arguments to native calls. + if (src_second == dst_second) { + return; // Self copy; no move + } + assert(src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad"); + + Unimplemented(); +} + +uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, + PhaseRegAlloc *ra_, + bool do_size, + outputStream* st) const { + assert(!do_size, "not supported"); + mach_spill_copy_implementation_helper(this, cbuf, ra_, st); + return 0; +} + +#ifndef PRODUCT +void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream *st ) const { + implementation( NULL, ra_, false, st ); +} +#endif + +void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + implementation( &cbuf, ra_, false, NULL ); +} + +uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + +//============================================================================= +#ifndef PRODUCT +void MachNopNode::format(PhaseRegAlloc *, outputStream *st) const { + st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count); +} +#endif + +void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *) const { + C2_MacroAssembler _masm(&cbuf); + for (int i = 0; i < _count; i += 1) { + __ nop(); + } +} + +uint MachNopNode::size(PhaseRegAlloc *ra_) const { + return 4 * _count; +} + + +//============================================================================= +#ifndef PRODUCT +void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream *st ) const { + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + int reg = ra_->get_reg_first(this); + st->print("LEA [R_SP+#%d+BIAS],%s",offset,Matcher::regName[reg]); +} +#endif + +void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + C2_MacroAssembler _masm(&cbuf); + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()) + STACK_BIAS; + int reg = ra_->get_encode(this); + + if (Assembler::is_simm13(offset)) { + __ add(SP, offset, reg_to_register_object(reg)); + } else { + __ set(offset, O7); + __ add(SP, O7, reg_to_register_object(reg)); + } +} + +uint BoxLockNode::size(PhaseRegAlloc *ra_) const { + // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_) + assert(ra_ == ra_->C->regalloc(), "sanity"); + return ra_->C->output()->scratch_emit_size(this); +} + +//============================================================================= +#ifndef PRODUCT +void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream *st ) const { + st->print_cr("\nUEP:"); + if (UseCompressedClassPointers) { + assert(Universe::heap() != NULL, "java heap should be initialized"); + st->print_cr("\tLDUW [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check - compressed klass"); + if (CompressedKlassPointers::base() != 0) { + st->print_cr("\tSET CompressedKlassPointers::base,R_G6_heap_base"); + if (CompressedKlassPointers::shift() != 0) { + st->print_cr("\tSLL R_G5,CompressedKlassPointers::shift,R_G5"); + } + st->print_cr("\tADD R_G5,R_G6_heap_base,R_G5"); + st->print_cr("\tSET CompressedOops::ptrs_base,R_G6_heap_base"); + } else { + st->print_cr("\tSLL R_G5,CompressedKlassPointers::shift,R_G5"); + } + } else { + st->print_cr("\tLDX [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check"); + } + st->print_cr("\tCMP R_G5,R_G3" ); + st->print ("\tTne xcc,R_G0+ST_RESERVED_FOR_USER_0+2"); +} +#endif + +void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + C2_MacroAssembler _masm(&cbuf); + Register G5_ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode()); + Register temp_reg = G3; + assert( G5_ic_reg != temp_reg, "conflicting registers" ); + + // Load klass from receiver + __ load_klass(O0, temp_reg); + // Compare against expected klass + __ cmp(temp_reg, G5_ic_reg); + // Branch to miss code, checks xcc or icc depending + __ trap(Assembler::notEqual, Assembler::ptr_cc, G0, ST_RESERVED_FOR_USER_0+2); +} + +uint MachUEPNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + + +//============================================================================= + + +// Emit exception handler code. +int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { + Register temp_reg = G3; + AddressLiteral exception_blob(OptoRuntime::exception_blob()->entry_point()); + C2_MacroAssembler _masm(&cbuf); + + address base = __ start_a_stub(size_exception_handler()); + if (base == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return 0; // CodeBuffer::expand failed + } + + int offset = __ offset(); + + __ JUMP(exception_blob, temp_reg, 0); // sethi;jmp + __ delayed()->nop(); + + assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); + + __ end_a_stub(); + + return offset; +} + +int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { + // Can't use any of the current frame's registers as we may have deopted + // at a poll and everything (including G3) can be live. + Register temp_reg = L0; + AddressLiteral deopt_blob(SharedRuntime::deopt_blob()->unpack()); + C2_MacroAssembler _masm(&cbuf); + + address base = __ start_a_stub(size_deopt_handler()); + if (base == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return 0; // CodeBuffer::expand failed + } + + int offset = __ offset(); + __ save_frame(0); + __ JUMP(deopt_blob, temp_reg, 0); // sethi;jmp + __ delayed()->restore(); + + assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); + + __ end_a_stub(); + return offset; + +} + +// Given a register encoding, produce a Integer Register object +static Register reg_to_register_object(int register_encoding) { + assert(L5->encoding() == R_L5_enc && G1->encoding() == R_G1_enc, "right coding"); + return as_Register(register_encoding); +} + +// Given a register encoding, produce a single-precision Float Register object +static FloatRegister reg_to_SingleFloatRegister_object(int register_encoding) { + assert(F5->encoding(FloatRegisterImpl::S) == R_F5_enc && F12->encoding(FloatRegisterImpl::S) == R_F12_enc, "right coding"); + return as_SingleFloatRegister(register_encoding); +} + +// Given a register encoding, produce a double-precision Float Register object +static FloatRegister reg_to_DoubleFloatRegister_object(int register_encoding) { + assert(F4->encoding(FloatRegisterImpl::D) == R_F4_enc, "right coding"); + assert(F32->encoding(FloatRegisterImpl::D) == R_D32_enc, "right coding"); + return as_DoubleFloatRegister(register_encoding); +} + +const bool Matcher::match_rule_supported(int opcode) { + if (!has_match_rule(opcode)) + return false; + + switch (opcode) { + case Op_CountLeadingZerosI: + case Op_CountLeadingZerosL: + case Op_CountTrailingZerosI: + case Op_CountTrailingZerosL: + case Op_PopCountI: + case Op_PopCountL: + if (!UsePopCountInstruction) + return false; + case Op_CompareAndSwapL: + case Op_CompareAndSwapP: + if (!VM_Version::supports_cx8()) + return false; + break; + } + + return true; // Per default match rules are supported. +} + +const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { + + // TODO + // identify extra cases that we might want to provide match rules for + // e.g. Op_ vector nodes and other intrinsics while guarding with vlen + bool ret_value = match_rule_supported(opcode); + // Add rules here. + + return ret_value; // Per default match rules are supported. +} + +const RegMask* Matcher::predicate_reg_mask(void) { + return NULL; +} + +const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) { + return NULL; +} + +// Vector calling convention not yet implemented. +const bool Matcher::supports_vector_calling_convention(void) { + return false; +} + +OptoRegPair Matcher::vector_return_value(uint ideal_reg) { + Unimplemented(); + return OptoRegPair(0, 0); +} + +const int Matcher::float_pressure(int default_pressure_threshold) { + return default_pressure_threshold; +} + +#ifdef ASSERT +address last_rethrow = NULL; // debugging aid for Rethrow encoding +#endif + +// Vector width in bytes +const int Matcher::vector_width_in_bytes(BasicType bt) { + assert(MaxVectorSize == 8, ""); + return 8; +} + +const int Matcher::scalable_vector_reg_size(const BasicType bt) { + return -1; +} +// Vector ideal reg +const uint Matcher::vector_ideal_reg(int size) { + assert(MaxVectorSize == 8, ""); + return Op_RegD; +} + +// Limits on vector size (number of elements) loaded into vector. +const int Matcher::max_vector_size(const BasicType bt) { + assert(is_java_primitive(bt), "only primitive type vectors"); + return vector_width_in_bytes(bt)/type2aelembytes(bt); +} + +const int Matcher::min_vector_size(const BasicType bt) { + return max_vector_size(bt); // Same as max. +} + +// Is this branch offset short enough that a short branch can be used? +// +// NOTE: If the platform does not provide any short branch variants, then +// this method should return false for offset 0. +bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { + // The passed offset is relative to address of the branch. + // Don't need to adjust the offset. + return UseCBCond && Assembler::is_simm12(offset); +} + +MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) { + ShouldNotReachHere(); // generic vector operands not supported + return NULL; +} + +bool Matcher::is_generic_reg2reg_move(MachNode* m) { + ShouldNotReachHere(); // generic vector operands not supported + return false; +} + +bool Matcher::is_generic_vector(MachOper* opnd) { + ShouldNotReachHere(); // generic vector operands not supported + return false; +} + +// Return whether or not this register is ever used as an argument. This +// function is used on startup to build the trampoline stubs in generateOptoStub. +// Registers not mentioned will be killed by the VM call in the trampoline, and +// arguments in those registers not be available to the callee. +bool Matcher::can_be_java_arg( int reg ) { + // Standard sparc 6 args in registers + if( reg == R_I0_num || + reg == R_I1_num || + reg == R_I2_num || + reg == R_I3_num || + reg == R_I4_num || + reg == R_I5_num ) return true; + // 64-bit builds can pass 64-bit pointers and longs in + // the high I registers + if( reg == R_I0H_num || + reg == R_I1H_num || + reg == R_I2H_num || + reg == R_I3H_num || + reg == R_I4H_num || + reg == R_I5H_num ) return true; + + if ((UseCompressedOops) && (reg == R_G6_num || reg == R_G6H_num)) { + return true; + } + + // A few float args in registers + if( reg >= R_F0_num && reg <= R_F7_num ) return true; + + return false; +} + +bool Matcher::is_spillable_arg( int reg ) { + return can_be_java_arg(reg); +} + +bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { + // Use hardware SDIVX instruction when it is + // faster than a code which use multiply. + return VM_Version::has_fast_idiv(); +} + +// Register for DIVI projection of divmodI +RegMask Matcher::divI_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +// Register for MODI projection of divmodI +RegMask Matcher::modI_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +// Register for DIVL projection of divmodL +RegMask Matcher::divL_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +// Register for MODL projection of divmodL +RegMask Matcher::modL_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +const RegMask Matcher::method_handle_invoke_SP_save_mask() { + return L7_REGP_mask(); +} + +// Should the matcher clone input 'm' of node 'n'? +bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { + return false; +} + +// Should the Matcher clone shifts on addressing modes, expecting them +// to be subsumed into complex addressing expressions or compute them +// into registers? +bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { + return clone_base_plus_offset_address(m, mstack, address_visited); +} + +%} + + +// The intptr_t operand types, defined by textual substitution. +// (Cf. opto/type.hpp. This lets us avoid many, many other ifdefs.) +#define immX immL +#define immX13 immL13 +#define immX13m7 immL13m7 +#define iRegX iRegL +#define g1RegX g1RegL + +//----------ENCODING BLOCK----------------------------------------------------- +// This block specifies the encoding classes used by the compiler to output +// byte streams. Encoding classes are parameterized macros used by +// Machine Instruction Nodes in order to generate the bit encoding of the +// instruction. Operands specify their base encoding interface with the +// interface keyword. There are currently supported four interfaces, +// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an +// operand to generate a function which returns its register number when +// queried. CONST_INTER causes an operand to generate a function which +// returns the value of the constant when queried. MEMORY_INTER causes an +// operand to generate four functions which return the Base Register, the +// Index Register, the Scale Value, and the Offset Value of the operand when +// queried. COND_INTER causes an operand to generate six functions which +// return the encoding code (ie - encoding bits for the instruction) +// associated with each basic boolean condition for a conditional instruction. +// +// Instructions specify two basic values for encoding. Again, a function +// is available to check if the constant displacement is an oop. They use the +// ins_encode keyword to specify their encoding classes (which must be +// a sequence of enc_class names, and their parameters, specified in +// the encoding block), and they use the +// opcode keyword to specify, in order, their primary, secondary, and +// tertiary opcode. Only the opcode sections which a particular instruction +// needs for encoding need to be specified. +encode %{ + enc_class enc_untested %{ +#ifdef ASSERT + C2_MacroAssembler _masm(&cbuf); + __ untested("encoding"); +#endif + %} + + enc_class form3_mem_reg( memory mem, iRegI dst ) %{ + emit_form3_mem_reg(cbuf, ra_, this, $primary, $tertiary, + $mem$$base, $mem$$disp, $mem$$index, $dst$$reg); + %} + + enc_class simple_form3_mem_reg( memory mem, iRegI dst ) %{ + emit_form3_mem_reg(cbuf, ra_, this, $primary, -1, + $mem$$base, $mem$$disp, $mem$$index, $dst$$reg); + %} + + enc_class form3_mem_prefetch_read( memory mem ) %{ + emit_form3_mem_reg(cbuf, ra_, this, $primary, -1, + $mem$$base, $mem$$disp, $mem$$index, 0/*prefetch function many-reads*/); + %} + + enc_class form3_mem_prefetch_write( memory mem ) %{ + emit_form3_mem_reg(cbuf, ra_, this, $primary, -1, + $mem$$base, $mem$$disp, $mem$$index, 2/*prefetch function many-writes*/); + %} + + enc_class form3_mem_reg_long_unaligned_marshal( memory mem, iRegL reg ) %{ + assert(Assembler::is_simm13($mem$$disp ), "need disp and disp+4"); + assert(Assembler::is_simm13($mem$$disp+4), "need disp and disp+4"); + guarantee($mem$$index == R_G0_enc, "double index?"); + emit_form3_mem_reg(cbuf, ra_, this, $primary, -1, $mem$$base, $mem$$disp+4, R_G0_enc, R_O7_enc ); + emit_form3_mem_reg(cbuf, ra_, this, $primary, -1, $mem$$base, $mem$$disp, R_G0_enc, $reg$$reg ); + emit3_simm13( cbuf, Assembler::arith_op, $reg$$reg, Assembler::sllx_op3, $reg$$reg, 0x1020 ); + emit3( cbuf, Assembler::arith_op, $reg$$reg, Assembler::or_op3, $reg$$reg, 0, R_O7_enc ); + %} + + enc_class form3_mem_reg_double_unaligned( memory mem, RegD_low reg ) %{ + assert(Assembler::is_simm13($mem$$disp ), "need disp and disp+4"); + assert(Assembler::is_simm13($mem$$disp+4), "need disp and disp+4"); + guarantee($mem$$index == R_G0_enc, "double index?"); + // Load long with 2 instructions + emit_form3_mem_reg(cbuf, ra_, this, $primary, -1, $mem$$base, $mem$$disp, R_G0_enc, $reg$$reg+0 ); + emit_form3_mem_reg(cbuf, ra_, this, $primary, -1, $mem$$base, $mem$$disp+4, R_G0_enc, $reg$$reg+1 ); + %} + + //%%% form3_mem_plus_4_reg is a hack--get rid of it + enc_class form3_mem_plus_4_reg( memory mem, iRegI dst ) %{ + guarantee($mem$$disp, "cannot offset a reg-reg operand by 4"); + emit_form3_mem_reg(cbuf, ra_, this, $primary, -1, $mem$$base, $mem$$disp + 4, $mem$$index, $dst$$reg); + %} + + enc_class form3_g0_rs2_rd_move( iRegI rs2, iRegI rd ) %{ + // Encode a reg-reg copy. If it is useless, then empty encoding. + if( $rs2$$reg != $rd$$reg ) + emit3( cbuf, Assembler::arith_op, $rd$$reg, Assembler::or_op3, 0, 0, $rs2$$reg ); + %} + + // Target lo half of long + enc_class form3_g0_rs2_rd_move_lo( iRegI rs2, iRegL rd ) %{ + // Encode a reg-reg copy. If it is useless, then empty encoding. + if( $rs2$$reg != LONG_LO_REG($rd$$reg) ) + emit3( cbuf, Assembler::arith_op, LONG_LO_REG($rd$$reg), Assembler::or_op3, 0, 0, $rs2$$reg ); + %} + + // Source lo half of long + enc_class form3_g0_rs2_rd_move_lo2( iRegL rs2, iRegI rd ) %{ + // Encode a reg-reg copy. If it is useless, then empty encoding. + if( LONG_LO_REG($rs2$$reg) != $rd$$reg ) + emit3( cbuf, Assembler::arith_op, $rd$$reg, Assembler::or_op3, 0, 0, LONG_LO_REG($rs2$$reg) ); + %} + + // Target hi half of long + enc_class form3_rs1_rd_copysign_hi( iRegI rs1, iRegL rd ) %{ + emit3_simm13( cbuf, Assembler::arith_op, $rd$$reg, Assembler::sra_op3, $rs1$$reg, 31 ); + %} + + // Source lo half of long, and leave it sign extended. + enc_class form3_rs1_rd_signextend_lo1( iRegL rs1, iRegI rd ) %{ + // Sign extend low half + emit3( cbuf, Assembler::arith_op, $rd$$reg, Assembler::sra_op3, $rs1$$reg, 0, 0 ); + %} + + // Source hi half of long, and leave it sign extended. + enc_class form3_rs1_rd_copy_hi1( iRegL rs1, iRegI rd ) %{ + // Shift high half to low half + emit3_simm13( cbuf, Assembler::arith_op, $rd$$reg, Assembler::srlx_op3, $rs1$$reg, 32 ); + %} + + // Source hi half of long + enc_class form3_g0_rs2_rd_move_hi2( iRegL rs2, iRegI rd ) %{ + // Encode a reg-reg copy. If it is useless, then empty encoding. + if( LONG_HI_REG($rs2$$reg) != $rd$$reg ) + emit3( cbuf, Assembler::arith_op, $rd$$reg, Assembler::or_op3, 0, 0, LONG_HI_REG($rs2$$reg) ); + %} + + enc_class form3_rs1_rs2_rd( iRegI rs1, iRegI rs2, iRegI rd ) %{ + emit3( cbuf, $secondary, $rd$$reg, $primary, $rs1$$reg, 0, $rs2$$reg ); + %} + + enc_class enc_to_bool( iRegI src, iRegI dst ) %{ + emit3 ( cbuf, Assembler::arith_op, 0, Assembler::subcc_op3, 0, 0, $src$$reg ); + emit3_simm13( cbuf, Assembler::arith_op, $dst$$reg, Assembler::addc_op3 , 0, 0 ); + %} + + enc_class enc_ltmask( iRegI p, iRegI q, iRegI dst ) %{ + emit3 ( cbuf, Assembler::arith_op, 0, Assembler::subcc_op3, $p$$reg, 0, $q$$reg ); + // clear if nothing else is happening + emit3_simm13( cbuf, Assembler::arith_op, $dst$$reg, Assembler::or_op3, 0, 0 ); + // blt,a,pn done + emit2_19 ( cbuf, Assembler::branch_op, 1/*annul*/, Assembler::less, Assembler::bp_op2, Assembler::icc, 0/*predict not taken*/, 2 ); + // mov dst,-1 in delay slot + emit3_simm13( cbuf, Assembler::arith_op, $dst$$reg, Assembler::or_op3, 0, -1 ); + %} + + enc_class form3_rs1_imm5_rd( iRegI rs1, immU5 imm5, iRegI rd ) %{ + emit3_simm13( cbuf, $secondary, $rd$$reg, $primary, $rs1$$reg, $imm5$$constant & 0x1F ); + %} + + enc_class form3_sd_rs1_imm6_rd( iRegL rs1, immU6 imm6, iRegL rd ) %{ + emit3_simm13( cbuf, $secondary, $rd$$reg, $primary, $rs1$$reg, ($imm6$$constant & 0x3F) | 0x1000 ); + %} + + enc_class form3_sd_rs1_rs2_rd( iRegL rs1, iRegI rs2, iRegL rd ) %{ + emit3( cbuf, $secondary, $rd$$reg, $primary, $rs1$$reg, 0x80, $rs2$$reg ); + %} + + enc_class form3_rs1_simm13_rd( iRegI rs1, immI13 simm13, iRegI rd ) %{ + emit3_simm13( cbuf, $secondary, $rd$$reg, $primary, $rs1$$reg, $simm13$$constant ); + %} + + enc_class move_return_pc_to_o1() %{ + emit3_simm13( cbuf, Assembler::arith_op, R_O1_enc, Assembler::add_op3, R_O7_enc, frame::pc_return_offset ); + %} + + /* %%% merge with enc_to_bool */ + enc_class enc_convP2B( iRegI dst, iRegP src ) %{ + C2_MacroAssembler _masm(&cbuf); + + Register src_reg = reg_to_register_object($src$$reg); + Register dst_reg = reg_to_register_object($dst$$reg); + __ movr(Assembler::rc_nz, src_reg, 1, dst_reg); + %} + + enc_class enc_cadd_cmpLTMask( iRegI p, iRegI q, iRegI y, iRegI tmp ) %{ + // (Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))) + C2_MacroAssembler _masm(&cbuf); + + Register p_reg = reg_to_register_object($p$$reg); + Register q_reg = reg_to_register_object($q$$reg); + Register y_reg = reg_to_register_object($y$$reg); + Register tmp_reg = reg_to_register_object($tmp$$reg); + + __ subcc( p_reg, q_reg, p_reg ); + __ add ( p_reg, y_reg, tmp_reg ); + __ movcc( Assembler::less, false, Assembler::icc, tmp_reg, p_reg ); + %} + + enc_class form_d2i_helper(regD src, regF dst) %{ + // fcmp %fcc0,$src,$src + emit3( cbuf, Assembler::arith_op , Assembler::fcc0, Assembler::fpop2_op3, $src$$reg, Assembler::fcmpd_opf, $src$$reg ); + // branch %fcc0 not-nan, predict taken + emit2_19( cbuf, Assembler::branch_op, 0/*annul*/, Assembler::f_ordered, Assembler::fbp_op2, Assembler::fcc0, 1/*predict taken*/, 4 ); + // fdtoi $src,$dst + emit3( cbuf, Assembler::arith_op , $dst$$reg, Assembler::fpop1_op3, 0, Assembler::fdtoi_opf, $src$$reg ); + // fitos $dst,$dst (if nan) + emit3( cbuf, Assembler::arith_op , $dst$$reg, Assembler::fpop1_op3, 0, Assembler::fitos_opf, $dst$$reg ); + // clear $dst (if nan) + emit3( cbuf, Assembler::arith_op , $dst$$reg, Assembler::fpop1_op3, $dst$$reg, Assembler::fsubs_opf, $dst$$reg ); + // carry on here... + %} + + enc_class form_d2l_helper(regD src, regD dst) %{ + // fcmp %fcc0,$src,$src check for NAN + emit3( cbuf, Assembler::arith_op , Assembler::fcc0, Assembler::fpop2_op3, $src$$reg, Assembler::fcmpd_opf, $src$$reg ); + // branch %fcc0 not-nan, predict taken + emit2_19( cbuf, Assembler::branch_op, 0/*annul*/, Assembler::f_ordered, Assembler::fbp_op2, Assembler::fcc0, 1/*predict taken*/, 4 ); + // fdtox $src,$dst convert in delay slot + emit3( cbuf, Assembler::arith_op , $dst$$reg, Assembler::fpop1_op3, 0, Assembler::fdtox_opf, $src$$reg ); + // fxtod $dst,$dst (if nan) + emit3( cbuf, Assembler::arith_op , $dst$$reg, Assembler::fpop1_op3, 0, Assembler::fxtod_opf, $dst$$reg ); + // clear $dst (if nan) + emit3( cbuf, Assembler::arith_op , $dst$$reg, Assembler::fpop1_op3, $dst$$reg, Assembler::fsubd_opf, $dst$$reg ); + // carry on here... + %} + + enc_class form_f2i_helper(regF src, regF dst) %{ + // fcmps %fcc0,$src,$src + emit3( cbuf, Assembler::arith_op , Assembler::fcc0, Assembler::fpop2_op3, $src$$reg, Assembler::fcmps_opf, $src$$reg ); + // branch %fcc0 not-nan, predict taken + emit2_19( cbuf, Assembler::branch_op, 0/*annul*/, Assembler::f_ordered, Assembler::fbp_op2, Assembler::fcc0, 1/*predict taken*/, 4 ); + // fstoi $src,$dst + emit3( cbuf, Assembler::arith_op , $dst$$reg, Assembler::fpop1_op3, 0, Assembler::fstoi_opf, $src$$reg ); + // fitos $dst,$dst (if nan) + emit3( cbuf, Assembler::arith_op , $dst$$reg, Assembler::fpop1_op3, 0, Assembler::fitos_opf, $dst$$reg ); + // clear $dst (if nan) + emit3( cbuf, Assembler::arith_op , $dst$$reg, Assembler::fpop1_op3, $dst$$reg, Assembler::fsubs_opf, $dst$$reg ); + // carry on here... + %} + + enc_class form_f2l_helper(regF src, regD dst) %{ + // fcmps %fcc0,$src,$src + emit3( cbuf, Assembler::arith_op , Assembler::fcc0, Assembler::fpop2_op3, $src$$reg, Assembler::fcmps_opf, $src$$reg ); + // branch %fcc0 not-nan, predict taken + emit2_19( cbuf, Assembler::branch_op, 0/*annul*/, Assembler::f_ordered, Assembler::fbp_op2, Assembler::fcc0, 1/*predict taken*/, 4 ); + // fstox $src,$dst + emit3( cbuf, Assembler::arith_op , $dst$$reg, Assembler::fpop1_op3, 0, Assembler::fstox_opf, $src$$reg ); + // fxtod $dst,$dst (if nan) + emit3( cbuf, Assembler::arith_op , $dst$$reg, Assembler::fpop1_op3, 0, Assembler::fxtod_opf, $dst$$reg ); + // clear $dst (if nan) + emit3( cbuf, Assembler::arith_op , $dst$$reg, Assembler::fpop1_op3, $dst$$reg, Assembler::fsubd_opf, $dst$$reg ); + // carry on here... + %} + + enc_class form3_opf_rs2F_rdF(regF rs2, regF rd) %{ emit3(cbuf,$secondary,$rd$$reg,$primary,0,$tertiary,$rs2$$reg); %} + enc_class form3_opf_rs2F_rdD(regF rs2, regD rd) %{ emit3(cbuf,$secondary,$rd$$reg,$primary,0,$tertiary,$rs2$$reg); %} + enc_class form3_opf_rs2D_rdF(regD rs2, regF rd) %{ emit3(cbuf,$secondary,$rd$$reg,$primary,0,$tertiary,$rs2$$reg); %} + enc_class form3_opf_rs2D_rdD(regD rs2, regD rd) %{ emit3(cbuf,$secondary,$rd$$reg,$primary,0,$tertiary,$rs2$$reg); %} + + enc_class form3_opf_rs2D_lo_rdF(regD rs2, regF rd) %{ emit3(cbuf,$secondary,$rd$$reg,$primary,0,$tertiary,$rs2$$reg+1); %} + + enc_class form3_opf_rs2D_hi_rdD_hi(regD rs2, regD rd) %{ emit3(cbuf,$secondary,$rd$$reg,$primary,0,$tertiary,$rs2$$reg); %} + enc_class form3_opf_rs2D_lo_rdD_lo(regD rs2, regD rd) %{ emit3(cbuf,$secondary,$rd$$reg+1,$primary,0,$tertiary,$rs2$$reg+1); %} + + enc_class form3_opf_rs1F_rs2F_rdF( regF rs1, regF rs2, regF rd ) %{ + emit3( cbuf, $secondary, $rd$$reg, $primary, $rs1$$reg, $tertiary, $rs2$$reg ); + %} + + enc_class form3_opf_rs1D_rs2D_rdD( regD rs1, regD rs2, regD rd ) %{ + emit3( cbuf, $secondary, $rd$$reg, $primary, $rs1$$reg, $tertiary, $rs2$$reg ); + %} + + enc_class form3_opf_rs1F_rs2F_fcc( regF rs1, regF rs2, flagsRegF fcc ) %{ + emit3( cbuf, $secondary, $fcc$$reg, $primary, $rs1$$reg, $tertiary, $rs2$$reg ); + %} + + enc_class form3_opf_rs1D_rs2D_fcc( regD rs1, regD rs2, flagsRegF fcc ) %{ + emit3( cbuf, $secondary, $fcc$$reg, $primary, $rs1$$reg, $tertiary, $rs2$$reg ); + %} + + enc_class form3_convI2F(regF rs2, regF rd) %{ + emit3(cbuf,Assembler::arith_op,$rd$$reg,Assembler::fpop1_op3,0,$secondary,$rs2$$reg); + %} + + // Encloding class for traceable jumps + enc_class form_jmpl(g3RegP dest) %{ + emit_jmpl(cbuf, $dest$$reg); + %} + + enc_class form_jmpl_set_exception_pc(g1RegP dest) %{ + emit_jmpl_set_exception_pc(cbuf, $dest$$reg); + %} + + enc_class form2_nop() %{ + emit_nop(cbuf); + %} + + enc_class form2_illtrap() %{ + emit_illtrap(cbuf); + %} + + + // Compare longs and convert into -1, 0, 1. + enc_class cmpl_flag( iRegL src1, iRegL src2, iRegI dst ) %{ + // CMP $src1,$src2 + emit3( cbuf, Assembler::arith_op, 0, Assembler::subcc_op3, $src1$$reg, 0, $src2$$reg ); + // blt,a,pn done + emit2_19( cbuf, Assembler::branch_op, 1/*annul*/, Assembler::less , Assembler::bp_op2, Assembler::xcc, 0/*predict not taken*/, 5 ); + // mov dst,-1 in delay slot + emit3_simm13( cbuf, Assembler::arith_op, $dst$$reg, Assembler::or_op3, 0, -1 ); + // bgt,a,pn done + emit2_19( cbuf, Assembler::branch_op, 1/*annul*/, Assembler::greater, Assembler::bp_op2, Assembler::xcc, 0/*predict not taken*/, 3 ); + // mov dst,1 in delay slot + emit3_simm13( cbuf, Assembler::arith_op, $dst$$reg, Assembler::or_op3, 0, 1 ); + // CLR $dst + emit3( cbuf, Assembler::arith_op, $dst$$reg, Assembler::or_op3 , 0, 0, 0 ); + %} + + enc_class enc_PartialSubtypeCheck() %{ + C2_MacroAssembler _masm(&cbuf); + __ call(StubRoutines::Sparc::partial_subtype_check(), relocInfo::runtime_call_type); + __ delayed()->nop(); + %} + + enc_class enc_bp( label labl, cmpOp cmp, flagsReg cc ) %{ + C2_MacroAssembler _masm(&cbuf); + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + + __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L); + __ delayed()->nop(); + %} + + enc_class enc_bpr( label labl, cmpOp_reg cmp, iRegI op1 ) %{ + C2_MacroAssembler _masm(&cbuf); + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + + __ bpr( (Assembler::RCondition)($cmp$$cmpcode), false, predict_taken, as_Register($op1$$reg), *L); + __ delayed()->nop(); + %} + + enc_class enc_cmov_reg( cmpOp cmp, iRegI dst, iRegI src, immI pcc) %{ + int op = (Assembler::arith_op << 30) | + ($dst$$reg << 25) | + (Assembler::movcc_op3 << 19) | + (1 << 18) | // cc2 bit for 'icc' + ($cmp$$cmpcode << 14) | + (0 << 13) | // select register move + ($pcc$$constant << 11) | // cc1, cc0 bits for 'icc' or 'xcc' + ($src$$reg << 0); + cbuf.insts()->emit_int32(op); + %} + + enc_class enc_cmov_imm( cmpOp cmp, iRegI dst, immI11 src, immI pcc ) %{ + int simm11 = $src$$constant & ((1<<11)-1); // Mask to 11 bits + int op = (Assembler::arith_op << 30) | + ($dst$$reg << 25) | + (Assembler::movcc_op3 << 19) | + (1 << 18) | // cc2 bit for 'icc' + ($cmp$$cmpcode << 14) | + (1 << 13) | // select immediate move + ($pcc$$constant << 11) | // cc1, cc0 bits for 'icc' + (simm11 << 0); + cbuf.insts()->emit_int32(op); + %} + + enc_class enc_cmov_reg_f( cmpOpF cmp, iRegI dst, iRegI src, flagsRegF fcc ) %{ + int op = (Assembler::arith_op << 30) | + ($dst$$reg << 25) | + (Assembler::movcc_op3 << 19) | + (0 << 18) | // cc2 bit for 'fccX' + ($cmp$$cmpcode << 14) | + (0 << 13) | // select register move + ($fcc$$reg << 11) | // cc1, cc0 bits for fcc0-fcc3 + ($src$$reg << 0); + cbuf.insts()->emit_int32(op); + %} + + enc_class enc_cmov_imm_f( cmpOp cmp, iRegI dst, immI11 src, flagsRegF fcc ) %{ + int simm11 = $src$$constant & ((1<<11)-1); // Mask to 11 bits + int op = (Assembler::arith_op << 30) | + ($dst$$reg << 25) | + (Assembler::movcc_op3 << 19) | + (0 << 18) | // cc2 bit for 'fccX' + ($cmp$$cmpcode << 14) | + (1 << 13) | // select immediate move + ($fcc$$reg << 11) | // cc1, cc0 bits for fcc0-fcc3 + (simm11 << 0); + cbuf.insts()->emit_int32(op); + %} + + enc_class enc_cmovf_reg( cmpOp cmp, regD dst, regD src, immI pcc ) %{ + int op = (Assembler::arith_op << 30) | + ($dst$$reg << 25) | + (Assembler::fpop2_op3 << 19) | + (0 << 18) | + ($cmp$$cmpcode << 14) | + (1 << 13) | // select register move + ($pcc$$constant << 11) | // cc1-cc0 bits for 'icc' or 'xcc' + ($primary << 5) | // select single, double or quad + ($src$$reg << 0); + cbuf.insts()->emit_int32(op); + %} + + enc_class enc_cmovff_reg( cmpOpF cmp, flagsRegF fcc, regD dst, regD src ) %{ + int op = (Assembler::arith_op << 30) | + ($dst$$reg << 25) | + (Assembler::fpop2_op3 << 19) | + (0 << 18) | + ($cmp$$cmpcode << 14) | + ($fcc$$reg << 11) | // cc2-cc0 bits for 'fccX' + ($primary << 5) | // select single, double or quad + ($src$$reg << 0); + cbuf.insts()->emit_int32(op); + %} + + // Used by the MIN/MAX encodings. Same as a CMOV, but + // the condition comes from opcode-field instead of an argument. + enc_class enc_cmov_reg_minmax( iRegI dst, iRegI src ) %{ + int op = (Assembler::arith_op << 30) | + ($dst$$reg << 25) | + (Assembler::movcc_op3 << 19) | + (1 << 18) | // cc2 bit for 'icc' + ($primary << 14) | + (0 << 13) | // select register move + (0 << 11) | // cc1, cc0 bits for 'icc' + ($src$$reg << 0); + cbuf.insts()->emit_int32(op); + %} + + enc_class enc_cmov_reg_minmax_long( iRegL dst, iRegL src ) %{ + int op = (Assembler::arith_op << 30) | + ($dst$$reg << 25) | + (Assembler::movcc_op3 << 19) | + (6 << 16) | // cc2 bit for 'xcc' + ($primary << 14) | + (0 << 13) | // select register move + (0 << 11) | // cc1, cc0 bits for 'icc' + ($src$$reg << 0); + cbuf.insts()->emit_int32(op); + %} + + enc_class Set13( immI13 src, iRegI rd ) %{ + emit3_simm13( cbuf, Assembler::arith_op, $rd$$reg, Assembler::or_op3, 0, $src$$constant ); + %} + + enc_class SetHi22( immI src, iRegI rd ) %{ + emit2_22( cbuf, Assembler::branch_op, $rd$$reg, Assembler::sethi_op2, $src$$constant ); + %} + + enc_class Set32( immI src, iRegI rd ) %{ + C2_MacroAssembler _masm(&cbuf); + __ set($src$$constant, reg_to_register_object($rd$$reg)); + %} + + enc_class call_epilog %{ + if( VerifyStackAtCalls ) { + C2_MacroAssembler _masm(&cbuf); + int framesize = ra_->C->output()->frame_size_in_bytes(); + Register temp_reg = G3; + __ add(SP, framesize, temp_reg); + __ cmp(temp_reg, FP); + __ breakpoint_trap(Assembler::notEqual, Assembler::ptr_cc); + } + %} + + // Long values come back from native calls in O0:O1 in the 32-bit VM, copy the value + // to G1 so the register allocator will not have to deal with the misaligned register + // pair. + enc_class adjust_long_from_native_call %{ + %} + + enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime + // CALL directly to the runtime + // The user of this is responsible for ensuring that R_L7 is empty (killed). + emit_call_reloc(cbuf, $meth$$method, runtime_call_Relocation::spec(), /*preserve_g2=*/true); + %} + + enc_class preserve_SP %{ + C2_MacroAssembler _masm(&cbuf); + __ mov(SP, L7_mh_SP_save); + %} + + enc_class restore_SP %{ + C2_MacroAssembler _masm(&cbuf); + __ mov(L7_mh_SP_save, SP); + %} + + enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL + // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine + // who we intended to call. + if (!_method) { + emit_call_reloc(cbuf, $meth$$method, runtime_call_Relocation::spec()); + } else { + int method_index = resolved_method_index(cbuf); + RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) + : static_call_Relocation::spec(method_index); + emit_call_reloc(cbuf, $meth$$method, rspec); + + // Emit stub for static call. + address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); + if (stub == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + } + %} + + enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL + C2_MacroAssembler _masm(&cbuf); + __ set_inst_mark(); + int vtable_index = this->_vtable_index; + // MachCallDynamicJavaNode::ret_addr_offset uses this same test + if (vtable_index < 0) { + // must be invalid_vtable_index, not nonvirtual_vtable_index + assert(vtable_index == Method::invalid_vtable_index, "correct sentinel value"); + Register G5_ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode()); + assert(G5_ic_reg == G5_inline_cache_reg, "G5_inline_cache_reg used in assemble_ic_buffer_code()"); + assert(G5_ic_reg == G5_megamorphic_method, "G5_megamorphic_method used in megamorphic call stub"); + __ ic_call((address)$meth$$method, /*emit_delay=*/true, resolved_method_index(cbuf)); + } else { + assert(!UseInlineCaches, "expect vtable calls only if not using ICs"); + // Just go thru the vtable + // get receiver klass (receiver already checked for non-null) + // If we end up going thru a c2i adapter interpreter expects method in G5 + int off = __ offset(); + __ load_klass(O0, G3_scratch); + int klass_load_size; + if (UseCompressedClassPointers) { + assert(Universe::heap() != NULL, "java heap should be initialized"); + klass_load_size = MacroAssembler::instr_size_for_decode_klass_not_null() + 1*BytesPerInstWord; + } else { + klass_load_size = 1*BytesPerInstWord; + } + int entry_offset = in_bytes(Klass::vtable_start_offset()) + vtable_index*vtableEntry::size_in_bytes(); + int v_off = entry_offset + vtableEntry::method_offset_in_bytes(); + if (Assembler::is_simm13(v_off)) { + __ ld_ptr(G3, v_off, G5_method); + } else { + // Generate 2 instructions + __ Assembler::sethi(v_off & ~0x3ff, G5_method); + __ or3(G5_method, v_off & 0x3ff, G5_method); + // ld_ptr, set_hi, set + assert(__ offset() - off == klass_load_size + 2*BytesPerInstWord, + "Unexpected instruction size(s)"); + __ ld_ptr(G3, G5_method, G5_method); + } + // NOTE: for vtable dispatches, the vtable entry will never be null. + // However it may very well end up in handle_wrong_method if the + // method is abstract for the particular class. + __ ld_ptr(G5_method, in_bytes(Method::from_compiled_offset()), G3_scratch); + // jump to target (either compiled code or c2iadapter) + __ jmpl(G3_scratch, G0, O7); + __ delayed()->nop(); + } + %} + + enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL + C2_MacroAssembler _masm(&cbuf); + + Register G5_ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode()); + Register temp_reg = G3; // caller must kill G3! We cannot reuse G5_ic_reg here because + // we might be calling a C2I adapter which needs it. + + assert(temp_reg != G5_ic_reg, "conflicting registers"); + // Load nmethod + __ ld_ptr(G5_ic_reg, in_bytes(Method::from_compiled_offset()), temp_reg); + + // CALL to compiled java, indirect the contents of G3 + __ set_inst_mark(); + __ callr(temp_reg, G0); + __ delayed()->nop(); + %} + +enc_class idiv_reg(iRegIsafe src1, iRegIsafe src2, iRegIsafe dst) %{ + C2_MacroAssembler _masm(&cbuf); + Register Rdividend = reg_to_register_object($src1$$reg); + Register Rdivisor = reg_to_register_object($src2$$reg); + Register Rresult = reg_to_register_object($dst$$reg); + + __ sra(Rdivisor, 0, Rdivisor); + __ sra(Rdividend, 0, Rdividend); + __ sdivx(Rdividend, Rdivisor, Rresult); +%} + +enc_class idiv_imm(iRegIsafe src1, immI13 imm, iRegIsafe dst) %{ + C2_MacroAssembler _masm(&cbuf); + + Register Rdividend = reg_to_register_object($src1$$reg); + int divisor = $imm$$constant; + Register Rresult = reg_to_register_object($dst$$reg); + + __ sra(Rdividend, 0, Rdividend); + __ sdivx(Rdividend, divisor, Rresult); +%} + +enc_class enc_mul_hi(iRegIsafe dst, iRegIsafe src1, iRegIsafe src2) %{ + C2_MacroAssembler _masm(&cbuf); + Register Rsrc1 = reg_to_register_object($src1$$reg); + Register Rsrc2 = reg_to_register_object($src2$$reg); + Register Rdst = reg_to_register_object($dst$$reg); + + __ sra( Rsrc1, 0, Rsrc1 ); + __ sra( Rsrc2, 0, Rsrc2 ); + __ mulx( Rsrc1, Rsrc2, Rdst ); + __ srlx( Rdst, 32, Rdst ); +%} + +enc_class irem_reg(iRegIsafe src1, iRegIsafe src2, iRegIsafe dst, o7RegL scratch) %{ + C2_MacroAssembler _masm(&cbuf); + Register Rdividend = reg_to_register_object($src1$$reg); + Register Rdivisor = reg_to_register_object($src2$$reg); + Register Rresult = reg_to_register_object($dst$$reg); + Register Rscratch = reg_to_register_object($scratch$$reg); + + assert(Rdividend != Rscratch, ""); + assert(Rdivisor != Rscratch, ""); + + __ sra(Rdividend, 0, Rdividend); + __ sra(Rdivisor, 0, Rdivisor); + __ sdivx(Rdividend, Rdivisor, Rscratch); + __ mulx(Rscratch, Rdivisor, Rscratch); + __ sub(Rdividend, Rscratch, Rresult); +%} + +enc_class irem_imm(iRegIsafe src1, immI13 imm, iRegIsafe dst, o7RegL scratch) %{ + C2_MacroAssembler _masm(&cbuf); + + Register Rdividend = reg_to_register_object($src1$$reg); + int divisor = $imm$$constant; + Register Rresult = reg_to_register_object($dst$$reg); + Register Rscratch = reg_to_register_object($scratch$$reg); + + assert(Rdividend != Rscratch, ""); + + __ sra(Rdividend, 0, Rdividend); + __ sdivx(Rdividend, divisor, Rscratch); + __ mulx(Rscratch, divisor, Rscratch); + __ sub(Rdividend, Rscratch, Rresult); +%} + +enc_class fabss (sflt_reg dst, sflt_reg src) %{ + C2_MacroAssembler _masm(&cbuf); + + FloatRegister Fdst = reg_to_SingleFloatRegister_object($dst$$reg); + FloatRegister Fsrc = reg_to_SingleFloatRegister_object($src$$reg); + + __ fabs(FloatRegisterImpl::S, Fsrc, Fdst); +%} + +enc_class fabsd (dflt_reg dst, dflt_reg src) %{ + C2_MacroAssembler _masm(&cbuf); + + FloatRegister Fdst = reg_to_DoubleFloatRegister_object($dst$$reg); + FloatRegister Fsrc = reg_to_DoubleFloatRegister_object($src$$reg); + + __ fabs(FloatRegisterImpl::D, Fsrc, Fdst); +%} + +enc_class fnegd (dflt_reg dst, dflt_reg src) %{ + C2_MacroAssembler _masm(&cbuf); + + FloatRegister Fdst = reg_to_DoubleFloatRegister_object($dst$$reg); + FloatRegister Fsrc = reg_to_DoubleFloatRegister_object($src$$reg); + + __ fneg(FloatRegisterImpl::D, Fsrc, Fdst); +%} + +enc_class fsqrts (sflt_reg dst, sflt_reg src) %{ + C2_MacroAssembler _masm(&cbuf); + + FloatRegister Fdst = reg_to_SingleFloatRegister_object($dst$$reg); + FloatRegister Fsrc = reg_to_SingleFloatRegister_object($src$$reg); + + __ fsqrt(FloatRegisterImpl::S, Fsrc, Fdst); +%} + +enc_class fsqrtd (dflt_reg dst, dflt_reg src) %{ + C2_MacroAssembler _masm(&cbuf); + + FloatRegister Fdst = reg_to_DoubleFloatRegister_object($dst$$reg); + FloatRegister Fsrc = reg_to_DoubleFloatRegister_object($src$$reg); + + __ fsqrt(FloatRegisterImpl::D, Fsrc, Fdst); +%} + + +enc_class fmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{ + C2_MacroAssembler _masm(&cbuf); + + FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg); + FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg); + FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg); + FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg); + + __ fmadd(FloatRegisterImpl::S, Fra, Frb, Frc, Frd); +%} + +enc_class fmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{ + C2_MacroAssembler _masm(&cbuf); + + FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg); + FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg); + FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg); + FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg); + + __ fmadd(FloatRegisterImpl::D, Fra, Frb, Frc, Frd); +%} + +enc_class fmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{ + C2_MacroAssembler _masm(&cbuf); + + FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg); + FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg); + FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg); + FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg); + + __ fmsub(FloatRegisterImpl::S, Fra, Frb, Frc, Frd); +%} + +enc_class fmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{ + C2_MacroAssembler _masm(&cbuf); + + FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg); + FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg); + FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg); + FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg); + + __ fmsub(FloatRegisterImpl::D, Fra, Frb, Frc, Frd); +%} + +enc_class fnmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{ + C2_MacroAssembler _masm(&cbuf); + + FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg); + FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg); + FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg); + FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg); + + __ fnmadd(FloatRegisterImpl::S, Fra, Frb, Frc, Frd); +%} + +enc_class fnmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{ + C2_MacroAssembler _masm(&cbuf); + + FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg); + FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg); + FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg); + FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg); + + __ fnmadd(FloatRegisterImpl::D, Fra, Frb, Frc, Frd); +%} + +enc_class fnmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{ + C2_MacroAssembler _masm(&cbuf); + + FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg); + FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg); + FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg); + FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg); + + __ fnmsub(FloatRegisterImpl::S, Fra, Frb, Frc, Frd); +%} + +enc_class fnmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{ + C2_MacroAssembler _masm(&cbuf); + + FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg); + FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg); + FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg); + FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg); + + __ fnmsub(FloatRegisterImpl::D, Fra, Frb, Frc, Frd); +%} + + +enc_class fmovs (dflt_reg dst, dflt_reg src) %{ + C2_MacroAssembler _masm(&cbuf); + + FloatRegister Fdst = reg_to_SingleFloatRegister_object($dst$$reg); + FloatRegister Fsrc = reg_to_SingleFloatRegister_object($src$$reg); + + __ fmov(FloatRegisterImpl::S, Fsrc, Fdst); +%} + +enc_class fmovd (dflt_reg dst, dflt_reg src) %{ + C2_MacroAssembler _masm(&cbuf); + + FloatRegister Fdst = reg_to_DoubleFloatRegister_object($dst$$reg); + FloatRegister Fsrc = reg_to_DoubleFloatRegister_object($src$$reg); + + __ fmov(FloatRegisterImpl::D, Fsrc, Fdst); +%} + +enc_class Fast_Lock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{ + C2_MacroAssembler _masm(&cbuf); + + Register Roop = reg_to_register_object($oop$$reg); + Register Rbox = reg_to_register_object($box$$reg); + Register Rscratch = reg_to_register_object($scratch$$reg); + Register Rmark = reg_to_register_object($scratch2$$reg); + + assert(Roop != Rscratch, ""); + assert(Roop != Rmark, ""); + assert(Rbox != Rscratch, ""); + assert(Rbox != Rmark, ""); + + __ compiler_lock_object(Roop, Rmark, Rbox, Rscratch, _counters, UseBiasedLocking && !UseOptoBiasInlining); +%} + +enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{ + C2_MacroAssembler _masm(&cbuf); + + Register Roop = reg_to_register_object($oop$$reg); + Register Rbox = reg_to_register_object($box$$reg); + Register Rscratch = reg_to_register_object($scratch$$reg); + Register Rmark = reg_to_register_object($scratch2$$reg); + + assert(Roop != Rscratch, ""); + assert(Roop != Rmark, ""); + assert(Rbox != Rscratch, ""); + assert(Rbox != Rmark, ""); + + __ compiler_unlock_object(Roop, Rmark, Rbox, Rscratch, UseBiasedLocking && !UseOptoBiasInlining); + %} + + enc_class enc_cas( iRegP mem, iRegP old, iRegP new ) %{ + C2_MacroAssembler _masm(&cbuf); + Register Rmem = reg_to_register_object($mem$$reg); + Register Rold = reg_to_register_object($old$$reg); + Register Rnew = reg_to_register_object($new$$reg); + + __ cas_ptr(Rmem, Rold, Rnew); // Swap(*Rmem,Rnew) if *Rmem == Rold + __ cmp( Rold, Rnew ); + %} + + enc_class enc_casx( iRegP mem, iRegL old, iRegL new) %{ + Register Rmem = reg_to_register_object($mem$$reg); + Register Rold = reg_to_register_object($old$$reg); + Register Rnew = reg_to_register_object($new$$reg); + + C2_MacroAssembler _masm(&cbuf); + __ mov(Rnew, O7); + __ casx(Rmem, Rold, O7); + __ cmp( Rold, O7 ); + %} + + // raw int cas, used for compareAndSwap + enc_class enc_casi( iRegP mem, iRegL old, iRegL new) %{ + Register Rmem = reg_to_register_object($mem$$reg); + Register Rold = reg_to_register_object($old$$reg); + Register Rnew = reg_to_register_object($new$$reg); + + C2_MacroAssembler _masm(&cbuf); + __ mov(Rnew, O7); + __ cas(Rmem, Rold, O7); + __ cmp( Rold, O7 ); + %} + + // raw int cas without using tmp register for compareAndExchange + enc_class enc_casi_exch( iRegP mem, iRegL old, iRegL new) %{ + Register Rmem = reg_to_register_object($mem$$reg); + Register Rold = reg_to_register_object($old$$reg); + Register Rnew = reg_to_register_object($new$$reg); + + C2_MacroAssembler _masm(&cbuf); + __ cas(Rmem, Rold, Rnew); + %} + + // 64-bit cas without using tmp register for compareAndExchange + enc_class enc_casx_exch( iRegP mem, iRegL old, iRegL new) %{ + Register Rmem = reg_to_register_object($mem$$reg); + Register Rold = reg_to_register_object($old$$reg); + Register Rnew = reg_to_register_object($new$$reg); + + C2_MacroAssembler _masm(&cbuf); + __ casx(Rmem, Rold, Rnew); + %} + + enc_class enc_lflags_ne_to_boolean( iRegI res ) %{ + Register Rres = reg_to_register_object($res$$reg); + + C2_MacroAssembler _masm(&cbuf); + __ mov(1, Rres); + __ movcc( Assembler::notEqual, false, Assembler::xcc, G0, Rres ); + %} + + enc_class enc_iflags_ne_to_boolean( iRegI res ) %{ + Register Rres = reg_to_register_object($res$$reg); + + C2_MacroAssembler _masm(&cbuf); + __ mov(1, Rres); + __ movcc( Assembler::notEqual, false, Assembler::icc, G0, Rres ); + %} + + enc_class floating_cmp ( iRegP dst, regF src1, regF src2 ) %{ + C2_MacroAssembler _masm(&cbuf); + Register Rdst = reg_to_register_object($dst$$reg); + FloatRegister Fsrc1 = $primary ? reg_to_SingleFloatRegister_object($src1$$reg) + : reg_to_DoubleFloatRegister_object($src1$$reg); + FloatRegister Fsrc2 = $primary ? reg_to_SingleFloatRegister_object($src2$$reg) + : reg_to_DoubleFloatRegister_object($src2$$reg); + + // Convert condition code fcc0 into -1,0,1; unordered reports less-than (-1) + __ float_cmp( $primary, -1, Fsrc1, Fsrc2, Rdst); + %} + + enc_class enc_rethrow() %{ + cbuf.set_insts_mark(); + Register temp_reg = G3; + AddressLiteral rethrow_stub(OptoRuntime::rethrow_stub()); + assert(temp_reg != reg_to_register_object(R_I0_num), "temp must not break oop_reg"); + C2_MacroAssembler _masm(&cbuf); +#ifdef ASSERT + __ save_frame(0); + AddressLiteral last_rethrow_addrlit(&last_rethrow); + __ sethi(last_rethrow_addrlit, L1); + Address addr(L1, last_rethrow_addrlit.low10()); + __ rdpc(L2); + __ inc(L2, 3 * BytesPerInstWord); // skip this & 2 more insns to point at jump_to + __ st_ptr(L2, addr); + __ restore(); +#endif + __ JUMP(rethrow_stub, temp_reg, 0); // sethi;jmp + __ delayed()->nop(); + %} + + enc_class emit_mem_nop() %{ + // Generates the instruction LDUXA [o6,g0],#0x82,g0 + cbuf.insts()->emit_int32((unsigned int) 0xc0839040); + %} + + enc_class emit_fadd_nop() %{ + // Generates the instruction FMOVS f31,f31 + cbuf.insts()->emit_int32((unsigned int) 0xbfa0003f); + %} + + enc_class emit_br_nop() %{ + // Generates the instruction BPN,PN . + cbuf.insts()->emit_int32((unsigned int) 0x00400000); + %} + + enc_class enc_membar_acquire %{ + C2_MacroAssembler _masm(&cbuf); + __ membar( Assembler::Membar_mask_bits(Assembler::LoadStore | Assembler::LoadLoad) ); + %} + + enc_class enc_membar_release %{ + C2_MacroAssembler _masm(&cbuf); + __ membar( Assembler::Membar_mask_bits(Assembler::LoadStore | Assembler::StoreStore) ); + %} + + enc_class enc_membar_volatile %{ + C2_MacroAssembler _masm(&cbuf); + __ membar( Assembler::Membar_mask_bits(Assembler::StoreLoad) ); + %} + +%} + +//----------FRAME-------------------------------------------------------------- +// Definition of frame structure and management information. +// +// S T A C K L A Y O U T Allocators stack-slot number +// | (to get allocators register number +// G Owned by | | v add VMRegImpl::stack0) +// r CALLER | | +// o | +--------+ pad to even-align allocators stack-slot +// w V | pad0 | numbers; owned by CALLER +// t -----------+--------+----> Matcher::_in_arg_limit, unaligned +// h ^ | in | 5 +// | | args | 4 Holes in incoming args owned by SELF +// | | | | 3 +// | | +--------+ +// V | | old out| Empty on Intel, window on Sparc +// | old |preserve| Must be even aligned. +// | SP-+--------+----> Matcher::_old_SP, 8 (or 16 in LP64)-byte aligned +// | | in | 3 area for Intel ret address +// Owned by |preserve| Empty on Sparc. +// SELF +--------+ +// | | pad2 | 2 pad to align old SP +// | +--------+ 1 +// | | locks | 0 +// | +--------+----> VMRegImpl::stack0, 8 (or 16 in LP64)-byte aligned +// | | pad1 | 11 pad to align new SP +// | +--------+ +// | | | 10 +// | | spills | 9 spills +// V | | 8 (pad0 slot for callee) +// -----------+--------+----> Matcher::_out_arg_limit, unaligned +// ^ | out | 7 +// | | args | 6 Holes in outgoing args owned by CALLEE +// Owned by +--------+ +// CALLEE | new out| 6 Empty on Intel, window on Sparc +// | new |preserve| Must be even-aligned. +// | SP-+--------+----> Matcher::_new_SP, even aligned +// | | | +// +// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is +// known from SELF's arguments and the Java calling convention. +// Region 6-7 is determined per call site. +// Note 2: If the calling convention leaves holes in the incoming argument +// area, those holes are owned by SELF. Holes in the outgoing area +// are owned by the CALLEE. Holes should not be necessary in the +// incoming area, as the Java calling convention is completely under +// the control of the AD file. Doubles can be sorted and packed to +// avoid holes. Holes in the outgoing arguments may be necessary for +// varargs C calling conventions. +// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is +// even aligned with pad0 as needed. +// Region 6 is even aligned. Region 6-7 is NOT even aligned; +// region 6-11 is even aligned; it may be padded out more so that +// the region from SP to FP meets the minimum stack alignment. + +frame %{ + // These two registers define part of the calling convention + // between compiled code and the interpreter. + inline_cache_reg(R_G5); // Inline Cache Register or Method* for I2C + + // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] + cisc_spilling_operand_name(indOffset); + + // Number of stack slots consumed by a Monitor enter + sync_stack_slots(2); + + // Compiled code's Frame Pointer + frame_pointer(R_SP); + + // Stack alignment requirement + stack_alignment(StackAlignmentInBytes); + // LP64: Alignment size in bytes (128-bit -> 16 bytes) + // !LP64: Alignment size in bytes (64-bit -> 8 bytes) + + // Number of stack slots between incoming argument block and the start of + // a new frame. The PROLOG must add this many slots to the stack. The + // EPILOG must remove this many slots. + in_preserve_stack_slots(0); + + // Number of outgoing stack slots killed above the out_preserve_stack_slots + // for calls to C. Supports the var-args backing area for register parms. + // ADLC doesn't support parsing expressions, so I folded the math by hand. + // (callee_register_argument_save_area_words (6) + callee_aggregate_return_pointer_words (0)) * 2-stack-slots-per-word + varargs_C_out_slots_killed(12); + + // The after-PROLOG location of the return address. Location of + // return address specifies a type (REG or STACK) and a number + // representing the register number (i.e. - use a register name) or + // stack slot. + return_addr(REG R_I7); // Ret Addr is in register I7 + + // Body of function which returns an OptoRegs array locating + // arguments either in registers or in stack slots for calling + // java + calling_convention %{ + (void) SharedRuntime::java_calling_convention(sig_bt, regs, length, is_outgoing); + + %} + + // Body of function which returns an OptoRegs array locating + // arguments either in registers or in stack slots for calling + // C. + c_calling_convention %{ + // This is obviously always outgoing + (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); + %} + + // Location of native (C/C++) and interpreter return values. This is specified to + // be the same as Java. In the 32-bit VM, long values are actually returned from + // native calls in O0:O1 and returned to the interpreter in I0:I1. The copying + // to and from the register pairs is done by the appropriate call and epilog + // opcodes. This simplifies the register allocator. + c_return_value %{ + assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); + static int lo_out[Op_RegL+1] = { OptoReg::Bad, OptoReg::Bad, R_O0_num, R_O0_num, R_O0_num, R_F0_num, R_F0_num, R_O0_num }; + static int hi_out[Op_RegL+1] = { OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, R_O0H_num, OptoReg::Bad, R_F1_num, R_O0H_num}; + static int lo_in [Op_RegL+1] = { OptoReg::Bad, OptoReg::Bad, R_I0_num, R_I0_num, R_I0_num, R_F0_num, R_F0_num, R_I0_num }; + static int hi_in [Op_RegL+1] = { OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, R_I0H_num, OptoReg::Bad, R_F1_num, R_I0H_num}; + return OptoRegPair( (is_outgoing?hi_out:hi_in)[ideal_reg], + (is_outgoing?lo_out:lo_in)[ideal_reg] ); + %} + + // Location of compiled Java return values. Same as C + return_value %{ + assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); + static int lo_out[Op_RegL+1] = { OptoReg::Bad, OptoReg::Bad, R_O0_num, R_O0_num, R_O0_num, R_F0_num, R_F0_num, R_O0_num }; + static int hi_out[Op_RegL+1] = { OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, R_O0H_num, OptoReg::Bad, R_F1_num, R_O0H_num}; + static int lo_in [Op_RegL+1] = { OptoReg::Bad, OptoReg::Bad, R_I0_num, R_I0_num, R_I0_num, R_F0_num, R_F0_num, R_I0_num }; + static int hi_in [Op_RegL+1] = { OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, R_I0H_num, OptoReg::Bad, R_F1_num, R_I0H_num}; + return OptoRegPair( (is_outgoing?hi_out:hi_in)[ideal_reg], + (is_outgoing?lo_out:lo_in)[ideal_reg] ); + %} + +%} + + +//----------ATTRIBUTES--------------------------------------------------------- +//----------Operand Attributes------------------------------------------------- +op_attrib op_cost(1); // Required cost attribute + +//----------Instruction Attributes--------------------------------------------- +ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute +ins_attrib ins_size(32); // Required size attribute (in bits) + +// avoid_back_to_back attribute is an expression that must return +// one of the following values defined in MachNode: +// AVOID_NONE - instruction can be placed anywhere +// AVOID_BEFORE - instruction cannot be placed after an +// instruction with MachNode::AVOID_AFTER +// AVOID_AFTER - the next instruction cannot be the one +// with MachNode::AVOID_BEFORE +// AVOID_BEFORE_AND_AFTER - BEFORE and AFTER attributes at +// the same time +ins_attrib ins_avoid_back_to_back(MachNode::AVOID_NONE); + +ins_attrib ins_short_branch(0); // Required flag: is this instruction a + // non-matching short branch variant of some + // long branch? + +//----------OPERANDS----------------------------------------------------------- +// Operand definitions must precede instruction definitions for correct parsing +// in the ADLC because operands constitute user defined types which are used in +// instruction definitions. + +//----------Simple Operands---------------------------------------------------- +// Immediate Operands +// Integer Immediate: 32-bit +operand immI() %{ + match(ConI); + + op_cost(0); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 0-bit +operand immI0() %{ + predicate(n->get_int() == 0); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 5-bit +operand immI5() %{ + predicate(Assembler::is_simm5(n->get_int())); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 8-bit +operand immI8() %{ + predicate(Assembler::is_simm8(n->get_int())); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the value 10 +operand immI10() %{ + predicate(n->get_int() == 10); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 11-bit +operand immI11() %{ + predicate(Assembler::is_simm11(n->get_int())); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 13-bit +operand immI13() %{ + predicate(Assembler::is_simm13(n->get_int())); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 13-bit minus 7 +operand immI13m7() %{ + predicate((-4096 < n->get_int()) && ((n->get_int() + 7) <= 4095)); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 16-bit +operand immI16() %{ + predicate(Assembler::is_simm16(n->get_int())); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the values 1-31 +operand immI_1_31() %{ + predicate(n->get_int() >= 1 && n->get_int() <= 31); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the values 32-63 +operand immI_32_63() %{ + predicate(n->get_int() >= 32 && n->get_int() <= 63); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Immediates for special shifts (sign extend) + +// Integer Immediate: the value 16 +operand immI_16() %{ + predicate(n->get_int() == 16); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the value 24 +operand immI_24() %{ + predicate(n->get_int() == 24); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} +// Integer Immediate: the value 255 +operand immI_255() %{ + predicate( n->get_int() == 255 ); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the value 65535 +operand immI_65535() %{ + predicate(n->get_int() == 65535); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the values 0-31 +operand immU5() %{ + predicate(n->get_int() >= 0 && n->get_int() <= 31); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 6-bit +operand immU6() %{ + predicate(n->get_int() >= 0 && n->get_int() <= 63); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Unsigned Integer Immediate: 12-bit (non-negative that fits in simm13) +operand immU12() %{ + predicate((0 <= n->get_int()) && Assembler::is_simm13(n->get_int())); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Unsigned Long Immediate: 12-bit (non-negative that fits in simm13) +operand immUL12() %{ + predicate((0 <= n->get_long()) && (n->get_long() == (int)n->get_long()) && Assembler::is_simm13((int)n->get_long())); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate non-negative +operand immU31() +%{ + predicate(n->get_int() >= 0); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: the value FF +operand immL_FF() %{ + predicate( n->get_long() == 0xFFL ); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: the value FFFF +operand immL_FFFF() %{ + predicate( n->get_long() == 0xFFFFL ); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate: 32 or 64-bit +operand immP() %{ + match(ConP); + + op_cost(5); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate: 64-bit +operand immP_set() %{ + predicate(!VM_Version::has_fast_ld()); + match(ConP); + + op_cost(5); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate: 64-bit +// From Niagara2 processors on a load should be better than materializing. +operand immP_load() %{ + predicate(VM_Version::has_fast_ld() && (n->bottom_type()->isa_oop_ptr() || (MacroAssembler::insts_for_set(n->get_ptr()) > 3))); + match(ConP); + + op_cost(5); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate: 64-bit +operand immP_no_oop_cheap() %{ + predicate(VM_Version::has_fast_ld() && !n->bottom_type()->isa_oop_ptr() && (MacroAssembler::insts_for_set(n->get_ptr()) <= 3)); + match(ConP); + + op_cost(5); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +operand immP13() %{ + predicate((-4096 < n->get_ptr()) && (n->get_ptr() <= 4095)); + match(ConP); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immP0() %{ + predicate(n->get_ptr() == 0); + match(ConP); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate +operand immN() +%{ + match(ConN); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +operand immNKlass() +%{ + match(ConNKlass); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +// NULL Pointer Immediate +operand immN0() +%{ + predicate(n->get_narrowcon() == 0); + match(ConN); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immL() %{ + match(ConL); + op_cost(40); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +operand immL0() %{ + predicate(n->get_long() == 0L); + match(ConL); + op_cost(0); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 5-bit +operand immL5() %{ + predicate(n->get_long() == (int)n->get_long() && Assembler::is_simm5((int)n->get_long())); + match(ConL); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: 13-bit +operand immL13() %{ + predicate((-4096L < n->get_long()) && (n->get_long() <= 4095L)); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: 13-bit minus 7 +operand immL13m7() %{ + predicate((-4096L < n->get_long()) && ((n->get_long() + 7L) <= 4095L)); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: low 32-bit mask +operand immL_32bits() %{ + predicate(n->get_long() == 0xFFFFFFFFL); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: cheap (materialize in <= 3 instructions) +operand immL_cheap() %{ + predicate(!VM_Version::has_fast_ld() || MacroAssembler::insts_for_set64(n->get_long()) <= 3); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: expensive (materialize in > 3 instructions) +operand immL_expensive() %{ + predicate(VM_Version::has_fast_ld() && MacroAssembler::insts_for_set64(n->get_long()) > 3); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Double Immediate +operand immD() %{ + match(ConD); + + op_cost(40); + format %{ %} + interface(CONST_INTER); +%} + +// Double Immediate: +0.0d +operand immD0() %{ + predicate(jlong_cast(n->getd()) == 0); + match(ConD); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Float Immediate +operand immF() %{ + match(ConF); + + op_cost(20); + format %{ %} + interface(CONST_INTER); +%} + +// Float Immediate: +0.0f +operand immF0() %{ + predicate(jint_cast(n->getf()) == 0); + match(ConF); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Register Operands +// Integer Register +operand iRegI() %{ + constraint(ALLOC_IN_RC(int_reg)); + match(RegI); + + match(notemp_iRegI); + match(g1RegI); + match(o0RegI); + match(iRegIsafe); + + format %{ %} + interface(REG_INTER); +%} + +operand notemp_iRegI() %{ + constraint(ALLOC_IN_RC(notemp_int_reg)); + match(RegI); + + match(o0RegI); + + format %{ %} + interface(REG_INTER); +%} + +operand o0RegI() %{ + constraint(ALLOC_IN_RC(o0_regI)); + match(iRegI); + + format %{ %} + interface(REG_INTER); +%} + +// Pointer Register +operand iRegP() %{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(RegP); + + match(lock_ptr_RegP); + match(g1RegP); + match(g2RegP); + match(g3RegP); + match(g4RegP); + match(i0RegP); + match(o0RegP); + match(o1RegP); + match(l7RegP); + + format %{ %} + interface(REG_INTER); +%} + +operand sp_ptr_RegP() %{ + constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(RegP); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand lock_ptr_RegP() %{ + constraint(ALLOC_IN_RC(lock_ptr_reg)); + match(RegP); + match(i0RegP); + match(o0RegP); + match(o1RegP); + match(l7RegP); + + format %{ %} + interface(REG_INTER); +%} + +operand g1RegP() %{ + constraint(ALLOC_IN_RC(g1_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand g2RegP() %{ + constraint(ALLOC_IN_RC(g2_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand g3RegP() %{ + constraint(ALLOC_IN_RC(g3_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand g1RegI() %{ + constraint(ALLOC_IN_RC(g1_regI)); + match(iRegI); + + format %{ %} + interface(REG_INTER); +%} + +operand g3RegI() %{ + constraint(ALLOC_IN_RC(g3_regI)); + match(iRegI); + + format %{ %} + interface(REG_INTER); +%} + +operand g4RegI() %{ + constraint(ALLOC_IN_RC(g4_regI)); + match(iRegI); + + format %{ %} + interface(REG_INTER); +%} + +operand g4RegP() %{ + constraint(ALLOC_IN_RC(g4_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand i0RegP() %{ + constraint(ALLOC_IN_RC(i0_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand o0RegP() %{ + constraint(ALLOC_IN_RC(o0_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand o1RegP() %{ + constraint(ALLOC_IN_RC(o1_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand o2RegP() %{ + constraint(ALLOC_IN_RC(o2_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand o7RegP() %{ + constraint(ALLOC_IN_RC(o7_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand l7RegP() %{ + constraint(ALLOC_IN_RC(l7_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand o7RegI() %{ + constraint(ALLOC_IN_RC(o7_regI)); + match(iRegI); + + format %{ %} + interface(REG_INTER); +%} + +operand iRegN() %{ + constraint(ALLOC_IN_RC(int_reg)); + match(RegN); + + format %{ %} + interface(REG_INTER); +%} + +// Long Register +operand iRegL() %{ + constraint(ALLOC_IN_RC(long_reg)); + match(RegL); + + format %{ %} + interface(REG_INTER); +%} + +operand o2RegL() %{ + constraint(ALLOC_IN_RC(o2_regL)); + match(iRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand o7RegL() %{ + constraint(ALLOC_IN_RC(o7_regL)); + match(iRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand g1RegL() %{ + constraint(ALLOC_IN_RC(g1_regL)); + match(iRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand g3RegL() %{ + constraint(ALLOC_IN_RC(g3_regL)); + match(iRegL); + + format %{ %} + interface(REG_INTER); +%} + +// Int Register safe +// This is 64bit safe +operand iRegIsafe() %{ + constraint(ALLOC_IN_RC(long_reg)); + + match(iRegI); + + format %{ %} + interface(REG_INTER); +%} + +// Condition Code Flag Register +operand flagsReg() %{ + constraint(ALLOC_IN_RC(int_flags)); + match(RegFlags); + + format %{ "ccr" %} // both ICC and XCC + interface(REG_INTER); +%} + +// Condition Code Register, unsigned comparisons. +operand flagsRegU() %{ + constraint(ALLOC_IN_RC(int_flags)); + match(RegFlags); + + format %{ "icc_U" %} + interface(REG_INTER); +%} + +// Condition Code Register, pointer comparisons. +operand flagsRegP() %{ + constraint(ALLOC_IN_RC(int_flags)); + match(RegFlags); + + format %{ "xcc_P" %} + interface(REG_INTER); +%} + +// Condition Code Register, long comparisons. +operand flagsRegL() %{ + constraint(ALLOC_IN_RC(int_flags)); + match(RegFlags); + + format %{ "xcc_L" %} + interface(REG_INTER); +%} + +// Condition Code Register, unsigned long comparisons. +operand flagsRegUL() %{ + constraint(ALLOC_IN_RC(int_flags)); + match(RegFlags); + + format %{ "xcc_UL" %} + interface(REG_INTER); +%} + +// Condition Code Register, floating comparisons, unordered same as "less". +operand flagsRegF() %{ + constraint(ALLOC_IN_RC(float_flags)); + match(RegFlags); + match(flagsRegF0); + + format %{ %} + interface(REG_INTER); +%} + +operand flagsRegF0() %{ + constraint(ALLOC_IN_RC(float_flag0)); + match(RegFlags); + + format %{ %} + interface(REG_INTER); +%} + + +// Condition Code Flag Register used by long compare +operand flagsReg_long_LTGE() %{ + constraint(ALLOC_IN_RC(int_flags)); + match(RegFlags); + format %{ "icc_LTGE" %} + interface(REG_INTER); +%} +operand flagsReg_long_EQNE() %{ + constraint(ALLOC_IN_RC(int_flags)); + match(RegFlags); + format %{ "icc_EQNE" %} + interface(REG_INTER); +%} +operand flagsReg_long_LEGT() %{ + constraint(ALLOC_IN_RC(int_flags)); + match(RegFlags); + format %{ "icc_LEGT" %} + interface(REG_INTER); +%} + + +operand regD() %{ + constraint(ALLOC_IN_RC(dflt_reg)); + match(RegD); + + match(regD_low); + + format %{ %} + interface(REG_INTER); +%} + +operand regF() %{ + constraint(ALLOC_IN_RC(sflt_reg)); + match(RegF); + + format %{ %} + interface(REG_INTER); +%} + +operand regD_low() %{ + constraint(ALLOC_IN_RC(dflt_low_reg)); + match(regD); + + format %{ %} + interface(REG_INTER); +%} + +// Special Registers + +// Method Register +operand inline_cache_regP(iRegP reg) %{ + constraint(ALLOC_IN_RC(g5_regP)); // G5=inline_cache_reg but uses 2 bits instead of 1 + match(reg); + format %{ %} + interface(REG_INTER); +%} + +operand interpreter_method_oop_regP(iRegP reg) %{ + constraint(ALLOC_IN_RC(g5_regP)); // G5=interpreter_method_oop_reg but uses 2 bits instead of 1 + match(reg); + format %{ %} + interface(REG_INTER); +%} + + +//----------Complex Operands--------------------------------------------------- +// Indirect Memory Reference +operand indirect(sp_ptr_RegP reg) %{ + constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(reg); + + op_cost(100); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp(0x0); + %} +%} + +// Indirect with simm13 Offset +operand indOffset13(sp_ptr_RegP reg, immX13 offset) %{ + constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(AddP reg offset); + + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp($offset); + %} +%} + +// Indirect with simm13 Offset minus 7 +operand indOffset13m7(sp_ptr_RegP reg, immX13m7 offset) %{ + constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(AddP reg offset); + + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp($offset); + %} +%} + +// Note: Intel has a swapped version also, like this: +//operand indOffsetX(iRegI reg, immP offset) %{ +// constraint(ALLOC_IN_RC(int_reg)); +// match(AddP offset reg); +// +// op_cost(100); +// format %{ "[$reg + $offset]" %} +// interface(MEMORY_INTER) %{ +// base($reg); +// index(0x0); +// scale(0x0); +// disp($offset); +// %} +//%} +//// However, it doesn't make sense for SPARC, since +// we have no particularly good way to embed oops in +// single instructions. + +// Indirect with Register Index +operand indIndex(iRegP addr, iRegX index) %{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP addr index); + + op_cost(100); + format %{ "[$addr + $index]" %} + interface(MEMORY_INTER) %{ + base($addr); + index($index); + scale(0x0); + disp(0x0); + %} +%} + +//----------Special Memory Operands-------------------------------------------- +// Stack Slot Operand - This operand is used for loading and storing temporary +// values on the stack where a match requires a value to +// flow through memory. +operand stackSlotI(sRegI reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + op_cost(100); + //match(RegI); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0xE); // R_SP + index(0x0); + scale(0x0); + disp($reg); // Stack Offset + %} +%} + +operand stackSlotP(sRegP reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + op_cost(100); + //match(RegP); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0xE); // R_SP + index(0x0); + scale(0x0); + disp($reg); // Stack Offset + %} +%} + +operand stackSlotF(sRegF reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + op_cost(100); + //match(RegF); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0xE); // R_SP + index(0x0); + scale(0x0); + disp($reg); // Stack Offset + %} +%} +operand stackSlotD(sRegD reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + op_cost(100); + //match(RegD); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0xE); // R_SP + index(0x0); + scale(0x0); + disp($reg); // Stack Offset + %} +%} +operand stackSlotL(sRegL reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + op_cost(100); + //match(RegL); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0xE); // R_SP + index(0x0); + scale(0x0); + disp($reg); // Stack Offset + %} +%} + +// Operands for expressing Control Flow +// NOTE: Label is a predefined operand which should not be redefined in +// the AD file. It is generically handled within the ADLC. + +//----------Conditional Branch Operands---------------------------------------- +// Comparison Op - This is the operation of the comparison, and is limited to +// the following set of codes: +// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) +// +// Other attributes of the comparison, such as unsignedness, are specified +// by the comparison instruction that sets a condition code flags register. +// That result is represented by a flags operand whose subtype is appropriate +// to the unsignedness (etc.) of the comparison. +// +// Later, the instruction which matches both the Comparison Op (a Bool) and +// the flags (produced by the Cmp) specifies the coding of the comparison op +// by matching a specific subtype of Bool operand below, such as cmpOpU. + +operand cmpOp() %{ + match(Bool); + + format %{ "" %} + interface(COND_INTER) %{ + equal(0x1); + not_equal(0x9); + less(0x3); + greater_equal(0xB); + less_equal(0x2); + greater(0xA); + overflow(0x7); + no_overflow(0xF); + %} +%} + +// Comparison Op, unsigned +operand cmpOpU() %{ + match(Bool); + predicate(n->as_Bool()->_test._test != BoolTest::overflow && + n->as_Bool()->_test._test != BoolTest::no_overflow); + + format %{ "u" %} + interface(COND_INTER) %{ + equal(0x1); + not_equal(0x9); + less(0x5); + greater_equal(0xD); + less_equal(0x4); + greater(0xC); + overflow(0x7); + no_overflow(0xF); + %} +%} + +// Comparison Op, pointer (same as unsigned) +operand cmpOpP() %{ + match(Bool); + predicate(n->as_Bool()->_test._test != BoolTest::overflow && + n->as_Bool()->_test._test != BoolTest::no_overflow); + + format %{ "p" %} + interface(COND_INTER) %{ + equal(0x1); + not_equal(0x9); + less(0x5); + greater_equal(0xD); + less_equal(0x4); + greater(0xC); + overflow(0x7); + no_overflow(0xF); + %} +%} + +// Comparison Op, branch-register encoding +operand cmpOp_reg() %{ + match(Bool); + predicate(n->as_Bool()->_test._test != BoolTest::overflow && + n->as_Bool()->_test._test != BoolTest::no_overflow); + + format %{ "" %} + interface(COND_INTER) %{ + equal (0x1); + not_equal (0x5); + less (0x3); + greater_equal(0x7); + less_equal (0x2); + greater (0x6); + overflow(0x7); // not supported + no_overflow(0xF); // not supported + %} +%} + +// Comparison Code, floating, unordered same as less +operand cmpOpF() %{ + match(Bool); + predicate(n->as_Bool()->_test._test != BoolTest::overflow && + n->as_Bool()->_test._test != BoolTest::no_overflow); + + format %{ "fl" %} + interface(COND_INTER) %{ + equal(0x9); + not_equal(0x1); + less(0x3); + greater_equal(0xB); + less_equal(0xE); + greater(0x6); + + overflow(0x7); // not supported + no_overflow(0xF); // not supported + %} +%} + +// Used by long compare +operand cmpOp_commute() %{ + match(Bool); + predicate(n->as_Bool()->_test._test != BoolTest::overflow && + n->as_Bool()->_test._test != BoolTest::no_overflow); + + format %{ "" %} + interface(COND_INTER) %{ + equal(0x1); + not_equal(0x9); + less(0xA); + greater_equal(0x2); + less_equal(0xB); + greater(0x3); + overflow(0x7); + no_overflow(0xF); + %} +%} + +//----------OPERAND CLASSES---------------------------------------------------- +// Operand Classes are groups of operands that are used to simplify +// instruction definitions by not requiring the AD writer to specify separate +// instructions for every form of operand when the instruction accepts +// multiple operand types with the same basic encoding and format. The classic +// case of this is memory operands. +opclass memory( indirect, indOffset13, indIndex ); +opclass indIndexMemory( indIndex ); + +//----------PIPELINE----------------------------------------------------------- +pipeline %{ + +//----------ATTRIBUTES--------------------------------------------------------- +attributes %{ + fixed_size_instructions; // Fixed size instructions + branch_has_delay_slot; // Branch has delay slot following + max_instructions_per_bundle = 4; // Up to 4 instructions per bundle + instruction_unit_size = 4; // An instruction is 4 bytes long + instruction_fetch_unit_size = 16; // The processor fetches one line + instruction_fetch_units = 1; // of 16 bytes + + // List of nop instructions + nops( Nop_A0, Nop_A1, Nop_MS, Nop_FA, Nop_BR ); +%} + +//----------RESOURCES---------------------------------------------------------- +// Resources are the functional units available to the machine +resources(A0, A1, MS, BR, FA, FM, IDIV, FDIV, IALU = A0 | A1); + +//----------PIPELINE DESCRIPTION----------------------------------------------- +// Pipeline Description specifies the stages in the machine's pipeline + +pipe_desc(A, P, F, B, I, J, S, R, E, C, M, W, X, T, D); + +//----------PIPELINE CLASSES--------------------------------------------------- +// Pipeline Classes describe the stages in which input and output are +// referenced by the hardware pipeline. + +// Integer ALU reg-reg operation +pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + single_instruction; + dst : E(write); + src1 : R(read); + src2 : R(read); + IALU : R; +%} + +// Integer ALU reg-reg long operation +pipe_class ialu_reg_reg_2(iRegL dst, iRegL src1, iRegL src2) %{ + instruction_count(2); + dst : E(write); + src1 : R(read); + src2 : R(read); + IALU : R; + IALU : R; +%} + +// Integer ALU reg-reg long dependent operation +pipe_class ialu_reg_reg_2_dep(iRegL dst, iRegL src1, iRegL src2, flagsReg cr) %{ + instruction_count(1); multiple_bundles; + dst : E(write); + src1 : R(read); + src2 : R(read); + cr : E(write); + IALU : R(2); +%} + +// Integer ALU reg-imm operation +pipe_class ialu_reg_imm(iRegI dst, iRegI src1, immI13 src2) %{ + single_instruction; + dst : E(write); + src1 : R(read); + IALU : R; +%} + +// Integer ALU reg-reg operation with condition code +pipe_class ialu_cc_reg_reg(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{ + single_instruction; + dst : E(write); + cr : E(write); + src1 : R(read); + src2 : R(read); + IALU : R; +%} + +// Integer ALU reg-imm operation with condition code +pipe_class ialu_cc_reg_imm(iRegI dst, iRegI src1, immI13 src2, flagsReg cr) %{ + single_instruction; + dst : E(write); + cr : E(write); + src1 : R(read); + IALU : R; +%} + +// Integer ALU zero-reg operation +pipe_class ialu_zero_reg(iRegI dst, immI0 zero, iRegI src2) %{ + single_instruction; + dst : E(write); + src2 : R(read); + IALU : R; +%} + +// Integer ALU zero-reg operation with condition code only +pipe_class ialu_cconly_zero_reg(flagsReg cr, iRegI src) %{ + single_instruction; + cr : E(write); + src : R(read); + IALU : R; +%} + +// Integer ALU reg-reg operation with condition code only +pipe_class ialu_cconly_reg_reg(flagsReg cr, iRegI src1, iRegI src2) %{ + single_instruction; + cr : E(write); + src1 : R(read); + src2 : R(read); + IALU : R; +%} + +// Integer ALU reg-imm operation with condition code only +pipe_class ialu_cconly_reg_imm(flagsReg cr, iRegI src1, immI13 src2) %{ + single_instruction; + cr : E(write); + src1 : R(read); + IALU : R; +%} + +// Integer ALU reg-reg-zero operation with condition code only +pipe_class ialu_cconly_reg_reg_zero(flagsReg cr, iRegI src1, iRegI src2, immI0 zero) %{ + single_instruction; + cr : E(write); + src1 : R(read); + src2 : R(read); + IALU : R; +%} + +// Integer ALU reg-imm-zero operation with condition code only +pipe_class ialu_cconly_reg_imm_zero(flagsReg cr, iRegI src1, immI13 src2, immI0 zero) %{ + single_instruction; + cr : E(write); + src1 : R(read); + IALU : R; +%} + +// Integer ALU reg-reg operation with condition code, src1 modified +pipe_class ialu_cc_rwreg_reg(flagsReg cr, iRegI src1, iRegI src2) %{ + single_instruction; + cr : E(write); + src1 : E(write); + src1 : R(read); + src2 : R(read); + IALU : R; +%} + +// Integer ALU reg-imm operation with condition code, src1 modified +pipe_class ialu_cc_rwreg_imm(flagsReg cr, iRegI src1, immI13 src2) %{ + single_instruction; + cr : E(write); + src1 : E(write); + src1 : R(read); + IALU : R; +%} + +pipe_class cmpL_reg(iRegI dst, iRegL src1, iRegL src2, flagsReg cr ) %{ + multiple_bundles; + dst : E(write)+4; + cr : E(write); + src1 : R(read); + src2 : R(read); + IALU : R(3); + BR : R(2); +%} + +// Integer ALU operation +pipe_class ialu_none(iRegI dst) %{ + single_instruction; + dst : E(write); + IALU : R; +%} + +// Integer ALU reg operation +pipe_class ialu_reg(iRegI dst, iRegI src) %{ + single_instruction; may_have_no_code; + dst : E(write); + src : R(read); + IALU : R; +%} + +// Integer ALU reg conditional operation +// This instruction has a 1 cycle stall, and cannot execute +// in the same cycle as the instruction setting the condition +// code. We kludge this by pretending to read the condition code +// 1 cycle earlier, and by marking the functional units as busy +// for 2 cycles with the result available 1 cycle later than +// is really the case. +pipe_class ialu_reg_flags( iRegI op2_out, iRegI op2_in, iRegI op1, flagsReg cr ) %{ + single_instruction; + op2_out : C(write); + op1 : R(read); + cr : R(read); // This is really E, with a 1 cycle stall + BR : R(2); + MS : R(2); +%} + +pipe_class ialu_clr_and_mover( iRegI dst, iRegP src ) %{ + instruction_count(1); multiple_bundles; + dst : C(write)+1; + src : R(read)+1; + IALU : R(1); + BR : E(2); + MS : E(2); +%} + +// Integer ALU reg operation +pipe_class ialu_move_reg_L_to_I(iRegI dst, iRegL src) %{ + single_instruction; may_have_no_code; + dst : E(write); + src : R(read); + IALU : R; +%} +pipe_class ialu_move_reg_I_to_L(iRegL dst, iRegI src) %{ + single_instruction; may_have_no_code; + dst : E(write); + src : R(read); + IALU : R; +%} + +// Two integer ALU reg operations +pipe_class ialu_reg_2(iRegL dst, iRegL src) %{ + instruction_count(2); + dst : E(write); + src : R(read); + A0 : R; + A1 : R; +%} + +// Two integer ALU reg operations +pipe_class ialu_move_reg_L_to_L(iRegL dst, iRegL src) %{ + instruction_count(2); may_have_no_code; + dst : E(write); + src : R(read); + A0 : R; + A1 : R; +%} + +// Integer ALU imm operation +pipe_class ialu_imm(iRegI dst, immI13 src) %{ + single_instruction; + dst : E(write); + IALU : R; +%} + +// Integer ALU reg-reg with carry operation +pipe_class ialu_reg_reg_cy(iRegI dst, iRegI src1, iRegI src2, iRegI cy) %{ + single_instruction; + dst : E(write); + src1 : R(read); + src2 : R(read); + IALU : R; +%} + +// Integer ALU cc operation +pipe_class ialu_cc(iRegI dst, flagsReg cc) %{ + single_instruction; + dst : E(write); + cc : R(read); + IALU : R; +%} + +// Integer ALU cc / second IALU operation +pipe_class ialu_reg_ialu( iRegI dst, iRegI src ) %{ + instruction_count(1); multiple_bundles; + dst : E(write)+1; + src : R(read); + IALU : R; +%} + +// Integer ALU cc / second IALU operation +pipe_class ialu_reg_reg_ialu( iRegI dst, iRegI p, iRegI q ) %{ + instruction_count(1); multiple_bundles; + dst : E(write)+1; + p : R(read); + q : R(read); + IALU : R; +%} + +// Integer ALU hi-lo-reg operation +pipe_class ialu_hi_lo_reg(iRegI dst, immI src) %{ + instruction_count(1); multiple_bundles; + dst : E(write)+1; + IALU : R(2); +%} + +// Float ALU hi-lo-reg operation (with temp) +pipe_class ialu_hi_lo_reg_temp(regF dst, immF src, g3RegP tmp) %{ + instruction_count(1); multiple_bundles; + dst : E(write)+1; + IALU : R(2); +%} + +// Long Constant +pipe_class loadConL( iRegL dst, immL src ) %{ + instruction_count(2); multiple_bundles; + dst : E(write)+1; + IALU : R(2); + IALU : R(2); +%} + +// Pointer Constant +pipe_class loadConP( iRegP dst, immP src ) %{ + instruction_count(0); multiple_bundles; + fixed_latency(6); +%} + +// Long Constant small +pipe_class loadConLlo( iRegL dst, immL src ) %{ + instruction_count(2); + dst : E(write); + IALU : R; + IALU : R; +%} + +// [PHH] This is wrong for 64-bit. See LdImmF/D. +pipe_class loadConFD(regF dst, immF src, g3RegP tmp) %{ + instruction_count(1); multiple_bundles; + src : R(read); + dst : M(write)+1; + IALU : R; + MS : E; +%} + +// Integer ALU nop operation +pipe_class ialu_nop() %{ + single_instruction; + IALU : R; +%} + +// Integer ALU nop operation +pipe_class ialu_nop_A0() %{ + single_instruction; + A0 : R; +%} + +// Integer ALU nop operation +pipe_class ialu_nop_A1() %{ + single_instruction; + A1 : R; +%} + +// Integer Multiply reg-reg operation +pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + single_instruction; + dst : E(write); + src1 : R(read); + src2 : R(read); + MS : R(5); +%} + +// Integer Multiply reg-imm operation +pipe_class imul_reg_imm(iRegI dst, iRegI src1, immI13 src2) %{ + single_instruction; + dst : E(write); + src1 : R(read); + MS : R(5); +%} + +pipe_class mulL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ + single_instruction; + dst : E(write)+4; + src1 : R(read); + src2 : R(read); + MS : R(6); +%} + +pipe_class mulL_reg_imm(iRegL dst, iRegL src1, immL13 src2) %{ + single_instruction; + dst : E(write)+4; + src1 : R(read); + MS : R(6); +%} + +// Integer Divide reg-reg +pipe_class sdiv_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI temp, flagsReg cr) %{ + instruction_count(1); multiple_bundles; + dst : E(write); + temp : E(write); + src1 : R(read); + src2 : R(read); + temp : R(read); + MS : R(38); +%} + +// Integer Divide reg-imm +pipe_class sdiv_reg_imm(iRegI dst, iRegI src1, immI13 src2, iRegI temp, flagsReg cr) %{ + instruction_count(1); multiple_bundles; + dst : E(write); + temp : E(write); + src1 : R(read); + temp : R(read); + MS : R(38); +%} + +// Long Divide +pipe_class divL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ + dst : E(write)+71; + src1 : R(read); + src2 : R(read)+1; + MS : R(70); +%} + +pipe_class divL_reg_imm(iRegL dst, iRegL src1, immL13 src2) %{ + dst : E(write)+71; + src1 : R(read); + MS : R(70); +%} + +// Floating Point Add Float +pipe_class faddF_reg_reg(regF dst, regF src1, regF src2) %{ + single_instruction; + dst : X(write); + src1 : E(read); + src2 : E(read); + FA : R; +%} + +// Floating Point Add Double +pipe_class faddD_reg_reg(regD dst, regD src1, regD src2) %{ + single_instruction; + dst : X(write); + src1 : E(read); + src2 : E(read); + FA : R; +%} + +// Floating Point Conditional Move based on integer flags +pipe_class int_conditional_float_move (cmpOp cmp, flagsReg cr, regF dst, regF src) %{ + single_instruction; + dst : X(write); + src : E(read); + cr : R(read); + FA : R(2); + BR : R(2); +%} + +// Floating Point Conditional Move based on integer flags +pipe_class int_conditional_double_move (cmpOp cmp, flagsReg cr, regD dst, regD src) %{ + single_instruction; + dst : X(write); + src : E(read); + cr : R(read); + FA : R(2); + BR : R(2); +%} + +// Floating Point Multiply Float +pipe_class fmulF_reg_reg(regF dst, regF src1, regF src2) %{ + single_instruction; + dst : X(write); + src1 : E(read); + src2 : E(read); + FM : R; +%} + +// Floating Point Multiply Double +pipe_class fmulD_reg_reg(regD dst, regD src1, regD src2) %{ + single_instruction; + dst : X(write); + src1 : E(read); + src2 : E(read); + FM : R; +%} + +// Floating Point Divide Float +pipe_class fdivF_reg_reg(regF dst, regF src1, regF src2) %{ + single_instruction; + dst : X(write); + src1 : E(read); + src2 : E(read); + FM : R; + FDIV : C(14); +%} + +// Floating Point Divide Double +pipe_class fdivD_reg_reg(regD dst, regD src1, regD src2) %{ + single_instruction; + dst : X(write); + src1 : E(read); + src2 : E(read); + FM : R; + FDIV : C(17); +%} + +// Fused floating-point multiply-add float. +pipe_class fmaF_regx4(regF dst, regF src1, regF src2, regF src3) %{ + single_instruction; + dst : X(write); + src1 : E(read); + src2 : E(read); + src3 : E(read); + FM : R; +%} + +// Fused gloating-point multiply-add double. +pipe_class fmaD_regx4(regD dst, regD src1, regD src2, regD src3) %{ + single_instruction; + dst : X(write); + src1 : E(read); + src2 : E(read); + src3 : E(read); + FM : R; +%} + +// Floating Point Move/Negate/Abs Float +pipe_class faddF_reg(regF dst, regF src) %{ + single_instruction; + dst : W(write); + src : E(read); + FA : R(1); +%} + +// Floating Point Move/Negate/Abs Double +pipe_class faddD_reg(regD dst, regD src) %{ + single_instruction; + dst : W(write); + src : E(read); + FA : R; +%} + +// Floating Point Convert F->D +pipe_class fcvtF2D(regD dst, regF src) %{ + single_instruction; + dst : X(write); + src : E(read); + FA : R; +%} + +// Floating Point Convert I->D +pipe_class fcvtI2D(regD dst, regF src) %{ + single_instruction; + dst : X(write); + src : E(read); + FA : R; +%} + +// Floating Point Convert LHi->D +pipe_class fcvtLHi2D(regD dst, regD src) %{ + single_instruction; + dst : X(write); + src : E(read); + FA : R; +%} + +// Floating Point Convert L->D +pipe_class fcvtL2D(regD dst, regF src) %{ + single_instruction; + dst : X(write); + src : E(read); + FA : R; +%} + +// Floating Point Convert L->F +pipe_class fcvtL2F(regD dst, regF src) %{ + single_instruction; + dst : X(write); + src : E(read); + FA : R; +%} + +// Floating Point Convert D->F +pipe_class fcvtD2F(regD dst, regF src) %{ + single_instruction; + dst : X(write); + src : E(read); + FA : R; +%} + +// Floating Point Convert I->L +pipe_class fcvtI2L(regD dst, regF src) %{ + single_instruction; + dst : X(write); + src : E(read); + FA : R; +%} + +// Floating Point Convert D->F +pipe_class fcvtD2I(regF dst, regD src, flagsReg cr) %{ + instruction_count(1); multiple_bundles; + dst : X(write)+6; + src : E(read); + FA : R; +%} + +// Floating Point Convert D->L +pipe_class fcvtD2L(regD dst, regD src, flagsReg cr) %{ + instruction_count(1); multiple_bundles; + dst : X(write)+6; + src : E(read); + FA : R; +%} + +// Floating Point Convert F->I +pipe_class fcvtF2I(regF dst, regF src, flagsReg cr) %{ + instruction_count(1); multiple_bundles; + dst : X(write)+6; + src : E(read); + FA : R; +%} + +// Floating Point Convert F->L +pipe_class fcvtF2L(regD dst, regF src, flagsReg cr) %{ + instruction_count(1); multiple_bundles; + dst : X(write)+6; + src : E(read); + FA : R; +%} + +// Floating Point Convert I->F +pipe_class fcvtI2F(regF dst, regF src) %{ + single_instruction; + dst : X(write); + src : E(read); + FA : R; +%} + +// Floating Point Compare +pipe_class faddF_fcc_reg_reg_zero(flagsRegF cr, regF src1, regF src2, immI0 zero) %{ + single_instruction; + cr : X(write); + src1 : E(read); + src2 : E(read); + FA : R; +%} + +// Floating Point Compare +pipe_class faddD_fcc_reg_reg_zero(flagsRegF cr, regD src1, regD src2, immI0 zero) %{ + single_instruction; + cr : X(write); + src1 : E(read); + src2 : E(read); + FA : R; +%} + +// Floating Add Nop +pipe_class fadd_nop() %{ + single_instruction; + FA : R; +%} + +// Integer Store to Memory +pipe_class istore_mem_reg(memory mem, iRegI src) %{ + single_instruction; + mem : R(read); + src : C(read); + MS : R; +%} + +// Integer Store to Memory +pipe_class istore_mem_spORreg(memory mem, sp_ptr_RegP src) %{ + single_instruction; + mem : R(read); + src : C(read); + MS : R; +%} + +// Integer Store Zero to Memory +pipe_class istore_mem_zero(memory mem, immI0 src) %{ + single_instruction; + mem : R(read); + MS : R; +%} + +// Special Stack Slot Store +pipe_class istore_stk_reg(stackSlotI stkSlot, iRegI src) %{ + single_instruction; + stkSlot : R(read); + src : C(read); + MS : R; +%} + +// Special Stack Slot Store +pipe_class lstoreI_stk_reg(stackSlotL stkSlot, iRegI src) %{ + instruction_count(2); multiple_bundles; + stkSlot : R(read); + src : C(read); + MS : R(2); +%} + +// Float Store +pipe_class fstoreF_mem_reg(memory mem, RegF src) %{ + single_instruction; + mem : R(read); + src : C(read); + MS : R; +%} + +// Float Store +pipe_class fstoreF_mem_zero(memory mem, immF0 src) %{ + single_instruction; + mem : R(read); + MS : R; +%} + +// Double Store +pipe_class fstoreD_mem_reg(memory mem, RegD src) %{ + instruction_count(1); + mem : R(read); + src : C(read); + MS : R; +%} + +// Double Store +pipe_class fstoreD_mem_zero(memory mem, immD0 src) %{ + single_instruction; + mem : R(read); + MS : R; +%} + +// Special Stack Slot Float Store +pipe_class fstoreF_stk_reg(stackSlotI stkSlot, RegF src) %{ + single_instruction; + stkSlot : R(read); + src : C(read); + MS : R; +%} + +// Special Stack Slot Double Store +pipe_class fstoreD_stk_reg(stackSlotI stkSlot, RegD src) %{ + single_instruction; + stkSlot : R(read); + src : C(read); + MS : R; +%} + +// Integer Load (when sign bit propagation not needed) +pipe_class iload_mem(iRegI dst, memory mem) %{ + single_instruction; + mem : R(read); + dst : C(write); + MS : R; +%} + +// Integer Load from stack operand +pipe_class iload_stkD(iRegI dst, stackSlotD mem ) %{ + single_instruction; + mem : R(read); + dst : C(write); + MS : R; +%} + +// Integer Load (when sign bit propagation or masking is needed) +pipe_class iload_mask_mem(iRegI dst, memory mem) %{ + single_instruction; + mem : R(read); + dst : M(write); + MS : R; +%} + +// Float Load +pipe_class floadF_mem(regF dst, memory mem) %{ + single_instruction; + mem : R(read); + dst : M(write); + MS : R; +%} + +// Float Load +pipe_class floadD_mem(regD dst, memory mem) %{ + instruction_count(1); multiple_bundles; // Again, unaligned argument is only multiple case + mem : R(read); + dst : M(write); + MS : R; +%} + +// Float Load +pipe_class floadF_stk(regF dst, stackSlotI stkSlot) %{ + single_instruction; + stkSlot : R(read); + dst : M(write); + MS : R; +%} + +// Float Load +pipe_class floadD_stk(regD dst, stackSlotI stkSlot) %{ + single_instruction; + stkSlot : R(read); + dst : M(write); + MS : R; +%} + +// Memory Nop +pipe_class mem_nop() %{ + single_instruction; + MS : R; +%} + +pipe_class sethi(iRegP dst, immI src) %{ + single_instruction; + dst : E(write); + IALU : R; +%} + +pipe_class loadPollP(iRegP poll) %{ + single_instruction; + poll : R(read); + MS : R; +%} + +pipe_class br(Universe br, label labl) %{ + single_instruction_with_delay_slot; + BR : R; +%} + +pipe_class br_cc(Universe br, cmpOp cmp, flagsReg cr, label labl) %{ + single_instruction_with_delay_slot; + cr : E(read); + BR : R; +%} + +pipe_class br_reg(Universe br, cmpOp cmp, iRegI op1, label labl) %{ + single_instruction_with_delay_slot; + op1 : E(read); + BR : R; + MS : R; +%} + +// Compare and branch +pipe_class cmp_br_reg_reg(Universe br, cmpOp cmp, iRegI src1, iRegI src2, label labl, flagsReg cr) %{ + instruction_count(2); has_delay_slot; + cr : E(write); + src1 : R(read); + src2 : R(read); + IALU : R; + BR : R; +%} + +// Compare and branch +pipe_class cmp_br_reg_imm(Universe br, cmpOp cmp, iRegI src1, immI13 src2, label labl, flagsReg cr) %{ + instruction_count(2); has_delay_slot; + cr : E(write); + src1 : R(read); + IALU : R; + BR : R; +%} + +// Compare and branch using cbcond +pipe_class cbcond_reg_reg(Universe br, cmpOp cmp, iRegI src1, iRegI src2, label labl) %{ + single_instruction; + src1 : E(read); + src2 : E(read); + IALU : R; + BR : R; +%} + +// Compare and branch using cbcond +pipe_class cbcond_reg_imm(Universe br, cmpOp cmp, iRegI src1, immI5 src2, label labl) %{ + single_instruction; + src1 : E(read); + IALU : R; + BR : R; +%} + +pipe_class br_fcc(Universe br, cmpOpF cc, flagsReg cr, label labl) %{ + single_instruction_with_delay_slot; + cr : E(read); + BR : R; +%} + +pipe_class br_nop() %{ + single_instruction; + BR : R; +%} + +pipe_class simple_call(method meth) %{ + instruction_count(2); multiple_bundles; force_serialization; + fixed_latency(100); + BR : R(1); + MS : R(1); + A0 : R(1); +%} + +pipe_class compiled_call(method meth) %{ + instruction_count(1); multiple_bundles; force_serialization; + fixed_latency(100); + MS : R(1); +%} + +pipe_class call(method meth) %{ + instruction_count(0); multiple_bundles; force_serialization; + fixed_latency(100); +%} + +pipe_class tail_call(Universe ignore, label labl) %{ + single_instruction; has_delay_slot; + fixed_latency(100); + BR : R(1); + MS : R(1); +%} + +pipe_class ret(Universe ignore) %{ + single_instruction; has_delay_slot; + BR : R(1); + MS : R(1); +%} + +pipe_class ret_poll(g3RegP poll) %{ + instruction_count(3); has_delay_slot; + poll : E(read); + MS : R; +%} + +// The real do-nothing guy +pipe_class empty( ) %{ + instruction_count(0); +%} + +pipe_class long_memory_op() %{ + instruction_count(0); multiple_bundles; force_serialization; + fixed_latency(25); + MS : R(1); +%} + +// Check-cast +pipe_class partial_subtype_check_pipe(Universe ignore, iRegP array, iRegP match ) %{ + array : R(read); + match : R(read); + IALU : R(2); + BR : R(2); + MS : R; +%} + +// Convert FPU flags into +1,0,-1 +pipe_class floating_cmp( iRegI dst, regF src1, regF src2 ) %{ + src1 : E(read); + src2 : E(read); + dst : E(write); + FA : R; + MS : R(2); + BR : R(2); +%} + +// Compare for p < q, and conditionally add y +pipe_class cadd_cmpltmask( iRegI p, iRegI q, iRegI y ) %{ + p : E(read); + q : E(read); + y : E(read); + IALU : R(3) +%} + +// Perform a compare, then move conditionally in a branch delay slot. +pipe_class min_max( iRegI src2, iRegI srcdst ) %{ + src2 : E(read); + srcdst : E(read); + IALU : R; + BR : R; +%} + +// Define the class for the Nop node +define %{ + MachNop = ialu_nop; +%} + +%} + +//----------INSTRUCTIONS------------------------------------------------------- + +//------------Special Stack Slot instructions - no match rules----------------- +instruct stkI_to_regF(regF dst, stackSlotI src) %{ + // No match rule to avoid chain rule match. + effect(DEF dst, USE src); + ins_cost(MEMORY_REF_COST); + format %{ "LDF $src,$dst\t! stkI to regF" %} + opcode(Assembler::ldf_op3); + ins_encode(simple_form3_mem_reg(src, dst)); + ins_pipe(floadF_stk); +%} + +instruct stkL_to_regD(regD dst, stackSlotL src) %{ + // No match rule to avoid chain rule match. + effect(DEF dst, USE src); + ins_cost(MEMORY_REF_COST); + format %{ "LDDF $src,$dst\t! stkL to regD" %} + opcode(Assembler::lddf_op3); + ins_encode(simple_form3_mem_reg(src, dst)); + ins_pipe(floadD_stk); +%} + +instruct regF_to_stkI(stackSlotI dst, regF src) %{ + // No match rule to avoid chain rule match. + effect(DEF dst, USE src); + ins_cost(MEMORY_REF_COST); + format %{ "STF $src,$dst\t! regF to stkI" %} + opcode(Assembler::stf_op3); + ins_encode(simple_form3_mem_reg(dst, src)); + ins_pipe(fstoreF_stk_reg); +%} + +instruct regD_to_stkL(stackSlotL dst, regD src) %{ + // No match rule to avoid chain rule match. + effect(DEF dst, USE src); + ins_cost(MEMORY_REF_COST); + format %{ "STDF $src,$dst\t! regD to stkL" %} + opcode(Assembler::stdf_op3); + ins_encode(simple_form3_mem_reg(dst, src)); + ins_pipe(fstoreD_stk_reg); +%} + +instruct regI_to_stkLHi(stackSlotL dst, iRegI src) %{ + effect(DEF dst, USE src); + ins_cost(MEMORY_REF_COST*2); + format %{ "STW $src,$dst.hi\t! long\n\t" + "STW R_G0,$dst.lo" %} + opcode(Assembler::stw_op3); + ins_encode(simple_form3_mem_reg(dst, src), form3_mem_plus_4_reg(dst, R_G0)); + ins_pipe(lstoreI_stk_reg); +%} + +instruct regL_to_stkD(stackSlotD dst, iRegL src) %{ + // No match rule to avoid chain rule match. + effect(DEF dst, USE src); + ins_cost(MEMORY_REF_COST); + format %{ "STX $src,$dst\t! regL to stkD" %} + opcode(Assembler::stx_op3); + ins_encode(simple_form3_mem_reg( dst, src ) ); + ins_pipe(istore_stk_reg); +%} + +//---------- Chain stack slots between similar types -------- + +// Load integer from stack slot +instruct stkI_to_regI( iRegI dst, stackSlotI src ) %{ + match(Set dst src); + ins_cost(MEMORY_REF_COST); + + format %{ "LDUW $src,$dst\t!stk" %} + opcode(Assembler::lduw_op3); + ins_encode(simple_form3_mem_reg( src, dst ) ); + ins_pipe(iload_mem); +%} + +// Store integer to stack slot +instruct regI_to_stkI( stackSlotI dst, iRegI src ) %{ + match(Set dst src); + ins_cost(MEMORY_REF_COST); + + format %{ "STW $src,$dst\t!stk" %} + opcode(Assembler::stw_op3); + ins_encode(simple_form3_mem_reg( dst, src ) ); + ins_pipe(istore_mem_reg); +%} + +// Load long from stack slot +instruct stkL_to_regL( iRegL dst, stackSlotL src ) %{ + match(Set dst src); + + ins_cost(MEMORY_REF_COST); + format %{ "LDX $src,$dst\t! long" %} + opcode(Assembler::ldx_op3); + ins_encode(simple_form3_mem_reg( src, dst ) ); + ins_pipe(iload_mem); +%} + +// Store long to stack slot +instruct regL_to_stkL(stackSlotL dst, iRegL src) %{ + match(Set dst src); + + ins_cost(MEMORY_REF_COST); + format %{ "STX $src,$dst\t! long" %} + opcode(Assembler::stx_op3); + ins_encode(simple_form3_mem_reg( dst, src ) ); + ins_pipe(istore_mem_reg); +%} + +// Load pointer from stack slot, 64-bit encoding +instruct stkP_to_regP( iRegP dst, stackSlotP src ) %{ + match(Set dst src); + ins_cost(MEMORY_REF_COST); + format %{ "LDX $src,$dst\t!ptr" %} + opcode(Assembler::ldx_op3); + ins_encode(simple_form3_mem_reg( src, dst ) ); + ins_pipe(iload_mem); +%} + +// Store pointer to stack slot +instruct regP_to_stkP(stackSlotP dst, iRegP src) %{ + match(Set dst src); + ins_cost(MEMORY_REF_COST); + format %{ "STX $src,$dst\t!ptr" %} + opcode(Assembler::stx_op3); + ins_encode(simple_form3_mem_reg( dst, src ) ); + ins_pipe(istore_mem_reg); +%} + +//------------Special Nop instructions for bundling - no match rules----------- +// Nop using the A0 functional unit +instruct Nop_A0() %{ + ins_cost(0); + + format %{ "NOP ! Alu Pipeline" %} + opcode(Assembler::or_op3, Assembler::arith_op); + ins_encode( form2_nop() ); + ins_pipe(ialu_nop_A0); +%} + +// Nop using the A1 functional unit +instruct Nop_A1( ) %{ + ins_cost(0); + + format %{ "NOP ! Alu Pipeline" %} + opcode(Assembler::or_op3, Assembler::arith_op); + ins_encode( form2_nop() ); + ins_pipe(ialu_nop_A1); +%} + +// Nop using the memory functional unit +instruct Nop_MS( ) %{ + ins_cost(0); + + format %{ "NOP ! Memory Pipeline" %} + ins_encode( emit_mem_nop ); + ins_pipe(mem_nop); +%} + +// Nop using the floating add functional unit +instruct Nop_FA( ) %{ + ins_cost(0); + + format %{ "NOP ! Floating Add Pipeline" %} + ins_encode( emit_fadd_nop ); + ins_pipe(fadd_nop); +%} + +// Nop using the branch functional unit +instruct Nop_BR( ) %{ + ins_cost(0); + + format %{ "NOP ! Branch Pipeline" %} + ins_encode( emit_br_nop ); + ins_pipe(br_nop); +%} + +//----------Load/Store/Move Instructions--------------------------------------- +//----------Load Instructions-------------------------------------------------- +// Load Byte (8bit signed) +instruct loadB(iRegI dst, memory mem) %{ + match(Set dst (LoadB mem)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDSB $mem,$dst\t! byte" %} + ins_encode %{ + __ ldsb($mem$$Address, $dst$$Register); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Byte (8bit signed) into a Long Register +instruct loadB2L(iRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadB mem))); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDSB $mem,$dst\t! byte -> long" %} + ins_encode %{ + __ ldsb($mem$$Address, $dst$$Register); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Unsigned Byte (8bit UNsigned) into an int reg +instruct loadUB(iRegI dst, memory mem) %{ + match(Set dst (LoadUB mem)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDUB $mem,$dst\t! ubyte" %} + ins_encode %{ + __ ldub($mem$$Address, $dst$$Register); + %} + ins_pipe(iload_mem); +%} + +// Load Unsigned Byte (8bit UNsigned) into a Long Register +instruct loadUB2L(iRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadUB mem))); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDUB $mem,$dst\t! ubyte -> long" %} + ins_encode %{ + __ ldub($mem$$Address, $dst$$Register); + %} + ins_pipe(iload_mem); +%} + +// Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register +instruct loadUB2L_immI(iRegL dst, memory mem, immI mask) %{ + match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); + ins_cost(MEMORY_REF_COST + DEFAULT_COST); + + size(2*4); + format %{ "LDUB $mem,$dst\t# ubyte & 32-bit mask -> long\n\t" + "AND $dst,right_n_bits($mask, 8),$dst" %} + ins_encode %{ + __ ldub($mem$$Address, $dst$$Register); + __ and3($dst$$Register, $mask$$constant & right_n_bits(8), $dst$$Register); + %} + ins_pipe(iload_mem); +%} + +// Load Short (16bit signed) +instruct loadS(iRegI dst, memory mem) %{ + match(Set dst (LoadS mem)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDSH $mem,$dst\t! short" %} + ins_encode %{ + __ ldsh($mem$$Address, $dst$$Register); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Short (16 bit signed) to Byte (8 bit signed) +instruct loadS2B(iRegI dst, indOffset13m7 mem, immI_24 twentyfour) %{ + match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); + ins_cost(MEMORY_REF_COST); + + size(4); + + format %{ "LDSB $mem+1,$dst\t! short -> byte" %} + ins_encode %{ + __ ldsb($mem$$Address, $dst$$Register, 1); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Short (16bit signed) into a Long Register +instruct loadS2L(iRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadS mem))); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDSH $mem,$dst\t! short -> long" %} + ins_encode %{ + __ ldsh($mem$$Address, $dst$$Register); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Unsigned Short/Char (16bit UNsigned) +instruct loadUS(iRegI dst, memory mem) %{ + match(Set dst (LoadUS mem)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDUH $mem,$dst\t! ushort/char" %} + ins_encode %{ + __ lduh($mem$$Address, $dst$$Register); + %} + ins_pipe(iload_mem); +%} + +// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) +instruct loadUS2B(iRegI dst, indOffset13m7 mem, immI_24 twentyfour) %{ + match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDSB $mem+1,$dst\t! ushort -> byte" %} + ins_encode %{ + __ ldsb($mem$$Address, $dst$$Register, 1); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Unsigned Short/Char (16bit UNsigned) into a Long Register +instruct loadUS2L(iRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadUS mem))); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDUH $mem,$dst\t! ushort/char -> long" %} + ins_encode %{ + __ lduh($mem$$Address, $dst$$Register); + %} + ins_pipe(iload_mem); +%} + +// Load Unsigned Short/Char (16bit UNsigned) with mask 0xFF into a Long Register +instruct loadUS2L_immI_255(iRegL dst, indOffset13m7 mem, immI_255 mask) %{ + match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDUB $mem+1,$dst\t! ushort/char & 0xFF -> long" %} + ins_encode %{ + __ ldub($mem$$Address, $dst$$Register, 1); // LSB is index+1 on BE + %} + ins_pipe(iload_mem); +%} + +// Load Unsigned Short/Char (16bit UNsigned) with a 13-bit mask into a Long Register +instruct loadUS2L_immI13(iRegL dst, memory mem, immI13 mask) %{ + match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); + ins_cost(MEMORY_REF_COST + DEFAULT_COST); + + size(2*4); + format %{ "LDUH $mem,$dst\t! ushort/char & 13-bit mask -> long\n\t" + "AND $dst,$mask,$dst" %} + ins_encode %{ + Register Rdst = $dst$$Register; + __ lduh($mem$$Address, Rdst); + __ and3(Rdst, $mask$$constant, Rdst); + %} + ins_pipe(iload_mem); +%} + +// Load Unsigned Short/Char (16bit UNsigned) with a 32-bit mask into a Long Register +instruct loadUS2L_immI(iRegL dst, memory mem, immI mask, iRegL tmp) %{ + match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); + effect(TEMP dst, TEMP tmp); + ins_cost(MEMORY_REF_COST + 2*DEFAULT_COST); + + format %{ "LDUH $mem,$dst\t! ushort/char & 32-bit mask -> long\n\t" + "SET right_n_bits($mask, 16),$tmp\n\t" + "AND $dst,$tmp,$dst" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rtmp = $tmp$$Register; + __ lduh($mem$$Address, Rdst); + __ set($mask$$constant & right_n_bits(16), Rtmp); + __ and3(Rdst, Rtmp, Rdst); + %} + ins_pipe(iload_mem); +%} + +// Load Integer +instruct loadI(iRegI dst, memory mem) %{ + match(Set dst (LoadI mem)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDUW $mem,$dst\t! int" %} + ins_encode %{ + __ lduw($mem$$Address, $dst$$Register); + %} + ins_pipe(iload_mem); +%} + +// Load Integer to Byte (8 bit signed) +instruct loadI2B(iRegI dst, indOffset13m7 mem, immI_24 twentyfour) %{ + match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); + ins_cost(MEMORY_REF_COST); + + size(4); + + format %{ "LDSB $mem+3,$dst\t! int -> byte" %} + ins_encode %{ + __ ldsb($mem$$Address, $dst$$Register, 3); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Integer to Unsigned Byte (8 bit UNsigned) +instruct loadI2UB(iRegI dst, indOffset13m7 mem, immI_255 mask) %{ + match(Set dst (AndI (LoadI mem) mask)); + ins_cost(MEMORY_REF_COST); + + size(4); + + format %{ "LDUB $mem+3,$dst\t! int -> ubyte" %} + ins_encode %{ + __ ldub($mem$$Address, $dst$$Register, 3); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Integer to Short (16 bit signed) +instruct loadI2S(iRegI dst, indOffset13m7 mem, immI_16 sixteen) %{ + match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); + ins_cost(MEMORY_REF_COST); + + size(4); + + format %{ "LDSH $mem+2,$dst\t! int -> short" %} + ins_encode %{ + __ ldsh($mem$$Address, $dst$$Register, 2); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Integer to Unsigned Short (16 bit UNsigned) +instruct loadI2US(iRegI dst, indOffset13m7 mem, immI_65535 mask) %{ + match(Set dst (AndI (LoadI mem) mask)); + ins_cost(MEMORY_REF_COST); + + size(4); + + format %{ "LDUH $mem+2,$dst\t! int -> ushort/char" %} + ins_encode %{ + __ lduh($mem$$Address, $dst$$Register, 2); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Integer into a Long Register +instruct loadI2L(iRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadI mem))); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDSW $mem,$dst\t! int -> long" %} + ins_encode %{ + __ ldsw($mem$$Address, $dst$$Register); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Integer with mask 0xFF into a Long Register +instruct loadI2L_immI_255(iRegL dst, indOffset13m7 mem, immI_255 mask) %{ + match(Set dst (ConvI2L (AndI (LoadI mem) mask))); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDUB $mem+3,$dst\t! int & 0xFF -> long" %} + ins_encode %{ + __ ldub($mem$$Address, $dst$$Register, 3); // LSB is index+3 on BE + %} + ins_pipe(iload_mem); +%} + +// Load Integer with mask 0xFFFF into a Long Register +instruct loadI2L_immI_65535(iRegL dst, indOffset13m7 mem, immI_65535 mask) %{ + match(Set dst (ConvI2L (AndI (LoadI mem) mask))); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDUH $mem+2,$dst\t! int & 0xFFFF -> long" %} + ins_encode %{ + __ lduh($mem$$Address, $dst$$Register, 2); // LSW is index+2 on BE + %} + ins_pipe(iload_mem); +%} + +// Load Integer with a 12-bit mask into a Long Register +instruct loadI2L_immU12(iRegL dst, memory mem, immU12 mask) %{ + match(Set dst (ConvI2L (AndI (LoadI mem) mask))); + ins_cost(MEMORY_REF_COST + DEFAULT_COST); + + size(2*4); + format %{ "LDUW $mem,$dst\t! int & 12-bit mask -> long\n\t" + "AND $dst,$mask,$dst" %} + ins_encode %{ + Register Rdst = $dst$$Register; + __ lduw($mem$$Address, Rdst); + __ and3(Rdst, $mask$$constant, Rdst); + %} + ins_pipe(iload_mem); +%} + +// Load Integer with a 31-bit mask into a Long Register +instruct loadI2L_immU31(iRegL dst, memory mem, immU31 mask, iRegL tmp) %{ + match(Set dst (ConvI2L (AndI (LoadI mem) mask))); + effect(TEMP dst, TEMP tmp); + ins_cost(MEMORY_REF_COST + 2*DEFAULT_COST); + + format %{ "LDUW $mem,$dst\t! int & 31-bit mask -> long\n\t" + "SET $mask,$tmp\n\t" + "AND $dst,$tmp,$dst" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rtmp = $tmp$$Register; + __ lduw($mem$$Address, Rdst); + __ set($mask$$constant, Rtmp); + __ and3(Rdst, Rtmp, Rdst); + %} + ins_pipe(iload_mem); +%} + +// Load Unsigned Integer into a Long Register +instruct loadUI2L(iRegL dst, memory mem, immL_32bits mask) %{ + match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDUW $mem,$dst\t! uint -> long" %} + ins_encode %{ + __ lduw($mem$$Address, $dst$$Register); + %} + ins_pipe(iload_mem); +%} + +// Load Long - aligned +instruct loadL(iRegL dst, memory mem ) %{ + match(Set dst (LoadL mem)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDX $mem,$dst\t! long" %} + ins_encode %{ + __ ldx($mem$$Address, $dst$$Register); + %} + ins_pipe(iload_mem); +%} + +// Load Long - UNaligned +instruct loadL_unaligned(iRegL dst, memory mem, o7RegI tmp) %{ + match(Set dst (LoadL_unaligned mem)); + effect(KILL tmp); + ins_cost(MEMORY_REF_COST*2+DEFAULT_COST); + format %{ "LDUW $mem+4,R_O7\t! misaligned long\n" + "\tLDUW $mem ,$dst\n" + "\tSLLX #32, $dst, $dst\n" + "\tOR $dst, R_O7, $dst" %} + opcode(Assembler::lduw_op3); + ins_encode(form3_mem_reg_long_unaligned_marshal( mem, dst )); + ins_pipe(iload_mem); +%} + +// Load Range +instruct loadRange(iRegI dst, memory mem) %{ + match(Set dst (LoadRange mem)); + ins_cost(MEMORY_REF_COST); + + format %{ "LDUW $mem,$dst\t! range" %} + opcode(Assembler::lduw_op3); + ins_encode(simple_form3_mem_reg( mem, dst ) ); + ins_pipe(iload_mem); +%} + +// Load Integer into %f register (for fitos/fitod) +instruct loadI_freg(regF dst, memory mem) %{ + match(Set dst (LoadI mem)); + ins_cost(MEMORY_REF_COST); + + format %{ "LDF $mem,$dst\t! for fitos/fitod" %} + opcode(Assembler::ldf_op3); + ins_encode(simple_form3_mem_reg( mem, dst ) ); + ins_pipe(floadF_mem); +%} + +// Load Pointer +instruct loadP(iRegP dst, memory mem) %{ + match(Set dst (LoadP mem)); + ins_cost(MEMORY_REF_COST); + size(4); + + format %{ "LDX $mem,$dst\t! ptr" %} + ins_encode %{ + __ ldx($mem$$Address, $dst$$Register); + %} + ins_pipe(iload_mem); +%} + +// Load Compressed Pointer +instruct loadN(iRegN dst, memory mem) %{ + match(Set dst (LoadN mem)); + ins_cost(MEMORY_REF_COST); + size(4); + + format %{ "LDUW $mem,$dst\t! compressed ptr" %} + ins_encode %{ + __ lduw($mem$$Address, $dst$$Register); + %} + ins_pipe(iload_mem); +%} + +// Load Klass Pointer +instruct loadKlass(iRegP dst, memory mem) %{ + match(Set dst (LoadKlass mem)); + ins_cost(MEMORY_REF_COST); + size(4); + + format %{ "LDX $mem,$dst\t! klass ptr" %} + ins_encode %{ + __ ldx($mem$$Address, $dst$$Register); + %} + ins_pipe(iload_mem); +%} + +// Load narrow Klass Pointer +instruct loadNKlass(iRegN dst, memory mem) %{ + match(Set dst (LoadNKlass mem)); + ins_cost(MEMORY_REF_COST); + size(4); + + format %{ "LDUW $mem,$dst\t! compressed klass ptr" %} + ins_encode %{ + __ lduw($mem$$Address, $dst$$Register); + %} + ins_pipe(iload_mem); +%} + +// Load Double +instruct loadD(regD dst, memory mem) %{ + match(Set dst (LoadD mem)); + ins_cost(MEMORY_REF_COST); + + format %{ "LDDF $mem,$dst" %} + opcode(Assembler::lddf_op3); + ins_encode(simple_form3_mem_reg( mem, dst ) ); + ins_pipe(floadD_mem); +%} + +// Load Double - UNaligned +instruct loadD_unaligned(regD_low dst, memory mem ) %{ + match(Set dst (LoadD_unaligned mem)); + ins_cost(MEMORY_REF_COST*2+DEFAULT_COST); + format %{ "LDF $mem ,$dst.hi\t! misaligned double\n" + "\tLDF $mem+4,$dst.lo\t!" %} + opcode(Assembler::ldf_op3); + ins_encode( form3_mem_reg_double_unaligned( mem, dst )); + ins_pipe(iload_mem); +%} + +// Load Float +instruct loadF(regF dst, memory mem) %{ + match(Set dst (LoadF mem)); + ins_cost(MEMORY_REF_COST); + + format %{ "LDF $mem,$dst" %} + opcode(Assembler::ldf_op3); + ins_encode(simple_form3_mem_reg( mem, dst ) ); + ins_pipe(floadF_mem); +%} + +// Load Constant +instruct loadConI( iRegI dst, immI src ) %{ + match(Set dst src); + ins_cost(DEFAULT_COST * 3/2); + format %{ "SET $src,$dst" %} + ins_encode( Set32(src, dst) ); + ins_pipe(ialu_hi_lo_reg); +%} + +instruct loadConI13( iRegI dst, immI13 src ) %{ + match(Set dst src); + + size(4); + format %{ "MOV $src,$dst" %} + ins_encode( Set13( src, dst ) ); + ins_pipe(ialu_imm); +%} + +instruct loadConP_set(iRegP dst, immP_set con) %{ + match(Set dst con); + ins_cost(DEFAULT_COST * 3/2); + format %{ "SET $con,$dst\t! ptr" %} + ins_encode %{ + relocInfo::relocType constant_reloc = _opnds[1]->constant_reloc(); + intptr_t val = $con$$constant; + if (constant_reloc == relocInfo::oop_type) { + __ set_oop_constant((jobject) val, $dst$$Register); + } else if (constant_reloc == relocInfo::metadata_type) { + __ set_metadata_constant((Metadata*)val, $dst$$Register); + } else { // non-oop pointers, e.g. card mark base, heap top + assert(constant_reloc == relocInfo::none, "unexpected reloc type"); + __ set(val, $dst$$Register); + } + %} + ins_pipe(loadConP); +%} + +instruct loadConP_load(iRegP dst, immP_load con) %{ + match(Set dst con); + ins_cost(MEMORY_REF_COST); + format %{ "LD [$constanttablebase + $constantoffset],$dst\t! load from constant table: ptr=$con" %} + ins_encode %{ + RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset($con), $dst$$Register); + __ ld_ptr($constanttablebase, con_offset, $dst$$Register); + %} + ins_pipe(loadConP); +%} + +instruct loadConP_no_oop_cheap(iRegP dst, immP_no_oop_cheap con) %{ + match(Set dst con); + ins_cost(DEFAULT_COST * 3/2); + format %{ "SET $con,$dst\t! non-oop ptr" %} + ins_encode %{ + if (_opnds[1]->constant_reloc() == relocInfo::metadata_type) { + __ set_metadata_constant((Metadata*)$con$$constant, $dst$$Register); + } else { + __ set($con$$constant, $dst$$Register); + } + %} + ins_pipe(loadConP); +%} + +instruct loadConP0(iRegP dst, immP0 src) %{ + match(Set dst src); + + size(4); + format %{ "CLR $dst\t!ptr" %} + ins_encode %{ + __ clr($dst$$Register); + %} + ins_pipe(ialu_imm); +%} + +instruct loadConN0(iRegN dst, immN0 src) %{ + match(Set dst src); + + size(4); + format %{ "CLR $dst\t! compressed NULL ptr" %} + ins_encode %{ + __ clr($dst$$Register); + %} + ins_pipe(ialu_imm); +%} + +instruct loadConN(iRegN dst, immN src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST * 3/2); + format %{ "SET $src,$dst\t! compressed ptr" %} + ins_encode %{ + Register dst = $dst$$Register; + __ set_narrow_oop((jobject)$src$$constant, dst); + %} + ins_pipe(ialu_hi_lo_reg); +%} + +instruct loadConNKlass(iRegN dst, immNKlass src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST * 3/2); + format %{ "SET $src,$dst\t! compressed klass ptr" %} + ins_encode %{ + Register dst = $dst$$Register; + __ set_narrow_klass((Klass*)$src$$constant, dst); + %} + ins_pipe(ialu_hi_lo_reg); +%} + +// Materialize long value (predicated by immL_cheap). +instruct loadConL_set64(iRegL dst, immL_cheap con, o7RegL tmp) %{ + match(Set dst con); + effect(KILL tmp); + ins_cost(DEFAULT_COST * 3); + format %{ "SET64 $con,$dst KILL $tmp\t! cheap long" %} + ins_encode %{ + __ set64($con$$constant, $dst$$Register, $tmp$$Register); + %} + ins_pipe(loadConL); +%} + +// Load long value from constant table (predicated by immL_expensive). +instruct loadConL_ldx(iRegL dst, immL_expensive con) %{ + match(Set dst con); + ins_cost(MEMORY_REF_COST); + format %{ "LDX [$constanttablebase + $constantoffset],$dst\t! load from constant table: long=$con" %} + ins_encode %{ + RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset($con), $dst$$Register); + __ ldx($constanttablebase, con_offset, $dst$$Register); + %} + ins_pipe(loadConL); +%} + +instruct loadConL0( iRegL dst, immL0 src ) %{ + match(Set dst src); + ins_cost(DEFAULT_COST); + size(4); + format %{ "CLR $dst\t! long" %} + ins_encode( Set13( src, dst ) ); + ins_pipe(ialu_imm); +%} + +instruct loadConL13( iRegL dst, immL13 src ) %{ + match(Set dst src); + ins_cost(DEFAULT_COST * 2); + + size(4); + format %{ "MOV $src,$dst\t! long" %} + ins_encode( Set13( src, dst ) ); + ins_pipe(ialu_imm); +%} + +instruct loadConF(regF dst, immF con, o7RegI tmp) %{ + match(Set dst con); + effect(KILL tmp); + format %{ "LDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: float=$con" %} + ins_encode %{ + RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset($con), $tmp$$Register); + __ ldf(FloatRegisterImpl::S, $constanttablebase, con_offset, $dst$$FloatRegister); + %} + ins_pipe(loadConFD); +%} + +instruct loadConD(regD dst, immD con, o7RegI tmp) %{ + match(Set dst con); + effect(KILL tmp); + format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: double=$con" %} + ins_encode %{ + // XXX This is a quick fix for 6833573. + //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset($con), $dst$$FloatRegister); + RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset($con), $tmp$$Register); + __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg)); + %} + ins_pipe(loadConFD); +%} + +// Prefetch instructions for allocation. +// Must be safe to execute with invalid address (cannot fault). + +instruct prefetchAlloc( memory mem ) %{ + predicate(AllocatePrefetchInstr == 0); + match( PrefetchAllocation mem ); + ins_cost(MEMORY_REF_COST); + + format %{ "PREFETCH $mem,2\t! Prefetch allocation" %} + opcode(Assembler::prefetch_op3); + ins_encode( form3_mem_prefetch_write( mem ) ); + ins_pipe(iload_mem); +%} + +// Use BIS instruction to prefetch for allocation. +// Could fault, need space at the end of TLAB. +instruct prefetchAlloc_bis( iRegP dst ) %{ + predicate(AllocatePrefetchInstr == 1); + match( PrefetchAllocation dst ); + ins_cost(MEMORY_REF_COST); + size(4); + + format %{ "STXA [$dst]\t! // Prefetch allocation using BIS" %} + ins_encode %{ + __ stxa(G0, $dst$$Register, G0, Assembler::ASI_ST_BLKINIT_PRIMARY); + %} + ins_pipe(istore_mem_reg); +%} + +// Next code is used for finding next cache line address to prefetch. +instruct cacheLineAdr( iRegP dst, iRegP src, immL13 mask ) %{ + match(Set dst (CastX2P (AndL (CastP2X src) mask))); + ins_cost(DEFAULT_COST); + size(4); + + format %{ "AND $src,$mask,$dst\t! next cache line address" %} + ins_encode %{ + __ and3($src$$Register, $mask$$constant, $dst$$Register); + %} + ins_pipe(ialu_reg_imm); +%} + +//----------Store Instructions------------------------------------------------- +// Store Byte +instruct storeB(memory mem, iRegI src) %{ + match(Set mem (StoreB mem src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STB $src,$mem\t! byte" %} + opcode(Assembler::stb_op3); + ins_encode(simple_form3_mem_reg( mem, src ) ); + ins_pipe(istore_mem_reg); +%} + +instruct storeB0(memory mem, immI0 src) %{ + match(Set mem (StoreB mem src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STB $src,$mem\t! byte" %} + opcode(Assembler::stb_op3); + ins_encode(simple_form3_mem_reg( mem, R_G0 ) ); + ins_pipe(istore_mem_zero); +%} + +instruct storeCM0(memory mem, immI0 src) %{ + match(Set mem (StoreCM mem src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STB $src,$mem\t! CMS card-mark byte 0" %} + opcode(Assembler::stb_op3); + ins_encode(simple_form3_mem_reg( mem, R_G0 ) ); + ins_pipe(istore_mem_zero); +%} + +// Store Char/Short +instruct storeC(memory mem, iRegI src) %{ + match(Set mem (StoreC mem src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STH $src,$mem\t! short" %} + opcode(Assembler::sth_op3); + ins_encode(simple_form3_mem_reg( mem, src ) ); + ins_pipe(istore_mem_reg); +%} + +instruct storeC0(memory mem, immI0 src) %{ + match(Set mem (StoreC mem src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STH $src,$mem\t! short" %} + opcode(Assembler::sth_op3); + ins_encode(simple_form3_mem_reg( mem, R_G0 ) ); + ins_pipe(istore_mem_zero); +%} + +// Store Integer +instruct storeI(memory mem, iRegI src) %{ + match(Set mem (StoreI mem src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STW $src,$mem" %} + opcode(Assembler::stw_op3); + ins_encode(simple_form3_mem_reg( mem, src ) ); + ins_pipe(istore_mem_reg); +%} + +// Store Long +instruct storeL(memory mem, iRegL src) %{ + match(Set mem (StoreL mem src)); + ins_cost(MEMORY_REF_COST); + format %{ "STX $src,$mem\t! long" %} + opcode(Assembler::stx_op3); + ins_encode(simple_form3_mem_reg( mem, src ) ); + ins_pipe(istore_mem_reg); +%} + +instruct storeI0(memory mem, immI0 src) %{ + match(Set mem (StoreI mem src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STW $src,$mem" %} + opcode(Assembler::stw_op3); + ins_encode(simple_form3_mem_reg( mem, R_G0 ) ); + ins_pipe(istore_mem_zero); +%} + +instruct storeL0(memory mem, immL0 src) %{ + match(Set mem (StoreL mem src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STX $src,$mem" %} + opcode(Assembler::stx_op3); + ins_encode(simple_form3_mem_reg( mem, R_G0 ) ); + ins_pipe(istore_mem_zero); +%} + +// Store Integer from float register (used after fstoi) +instruct storeI_Freg(memory mem, regF src) %{ + match(Set mem (StoreI mem src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STF $src,$mem\t! after fstoi/fdtoi" %} + opcode(Assembler::stf_op3); + ins_encode(simple_form3_mem_reg( mem, src ) ); + ins_pipe(fstoreF_mem_reg); +%} + +// Store Pointer +instruct storeP(memory dst, sp_ptr_RegP src) %{ + match(Set dst (StoreP dst src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STX $src,$dst\t! ptr" %} + opcode(Assembler::stx_op3, 0, REGP_OP); + ins_encode( form3_mem_reg( dst, src ) ); + ins_pipe(istore_mem_spORreg); +%} + +instruct storeP0(memory dst, immP0 src) %{ + match(Set dst (StoreP dst src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STX $src,$dst\t! ptr" %} + opcode(Assembler::stx_op3, 0, REGP_OP); + ins_encode( form3_mem_reg( dst, R_G0 ) ); + ins_pipe(istore_mem_zero); +%} + +// Store Compressed Pointer +instruct storeN(memory dst, iRegN src) %{ + match(Set dst (StoreN dst src)); + ins_cost(MEMORY_REF_COST); + size(4); + + format %{ "STW $src,$dst\t! compressed ptr" %} + ins_encode %{ + Register base = as_Register($dst$$base); + Register index = as_Register($dst$$index); + Register src = $src$$Register; + if (index != G0) { + __ stw(src, base, index); + } else { + __ stw(src, base, $dst$$disp); + } + %} + ins_pipe(istore_mem_spORreg); +%} + +instruct storeNKlass(memory dst, iRegN src) %{ + match(Set dst (StoreNKlass dst src)); + ins_cost(MEMORY_REF_COST); + size(4); + + format %{ "STW $src,$dst\t! compressed klass ptr" %} + ins_encode %{ + Register base = as_Register($dst$$base); + Register index = as_Register($dst$$index); + Register src = $src$$Register; + if (index != G0) { + __ stw(src, base, index); + } else { + __ stw(src, base, $dst$$disp); + } + %} + ins_pipe(istore_mem_spORreg); +%} + +instruct storeN0(memory dst, immN0 src) %{ + match(Set dst (StoreN dst src)); + ins_cost(MEMORY_REF_COST); + size(4); + + format %{ "STW $src,$dst\t! compressed ptr" %} + ins_encode %{ + Register base = as_Register($dst$$base); + Register index = as_Register($dst$$index); + if (index != G0) { + __ stw(0, base, index); + } else { + __ stw(0, base, $dst$$disp); + } + %} + ins_pipe(istore_mem_zero); +%} + +// Store Double +instruct storeD( memory mem, regD src) %{ + match(Set mem (StoreD mem src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STDF $src,$mem" %} + opcode(Assembler::stdf_op3); + ins_encode(simple_form3_mem_reg( mem, src ) ); + ins_pipe(fstoreD_mem_reg); +%} + +instruct storeD0( memory mem, immD0 src) %{ + match(Set mem (StoreD mem src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STX $src,$mem" %} + opcode(Assembler::stx_op3); + ins_encode(simple_form3_mem_reg( mem, R_G0 ) ); + ins_pipe(fstoreD_mem_zero); +%} + +// Store Float +instruct storeF( memory mem, regF src) %{ + match(Set mem (StoreF mem src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STF $src,$mem" %} + opcode(Assembler::stf_op3); + ins_encode(simple_form3_mem_reg( mem, src ) ); + ins_pipe(fstoreF_mem_reg); +%} + +instruct storeF0( memory mem, immF0 src) %{ + match(Set mem (StoreF mem src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STW $src,$mem\t! storeF0" %} + opcode(Assembler::stw_op3); + ins_encode(simple_form3_mem_reg( mem, R_G0 ) ); + ins_pipe(fstoreF_mem_zero); +%} + +// Convert oop pointer into compressed form +instruct encodeHeapOop(iRegN dst, iRegP src) %{ + predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull); + match(Set dst (EncodeP src)); + format %{ "encode_heap_oop $src, $dst" %} + ins_encode %{ + __ encode_heap_oop($src$$Register, $dst$$Register); + %} + ins_avoid_back_to_back(CompressedOops::base() == NULL ? AVOID_NONE : AVOID_BEFORE); + ins_pipe(ialu_reg); +%} + +instruct encodeHeapOop_not_null(iRegN dst, iRegP src) %{ + predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull); + match(Set dst (EncodeP src)); + format %{ "encode_heap_oop_not_null $src, $dst" %} + ins_encode %{ + __ encode_heap_oop_not_null($src$$Register, $dst$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct decodeHeapOop(iRegP dst, iRegN src) %{ + predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull && + n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant); + match(Set dst (DecodeN src)); + format %{ "decode_heap_oop $src, $dst" %} + ins_encode %{ + __ decode_heap_oop($src$$Register, $dst$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct decodeHeapOop_not_null(iRegP dst, iRegN src) %{ + predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull || + n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant); + match(Set dst (DecodeN src)); + format %{ "decode_heap_oop_not_null $src, $dst" %} + ins_encode %{ + __ decode_heap_oop_not_null($src$$Register, $dst$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct encodeKlass_not_null(iRegN dst, iRegP src) %{ + match(Set dst (EncodePKlass src)); + format %{ "encode_klass_not_null $src, $dst" %} + ins_encode %{ + __ encode_klass_not_null($src$$Register, $dst$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct decodeKlass_not_null(iRegP dst, iRegN src) %{ + match(Set dst (DecodeNKlass src)); + format %{ "decode_klass_not_null $src, $dst" %} + ins_encode %{ + __ decode_klass_not_null($src$$Register, $dst$$Register); + %} + ins_pipe(ialu_reg); +%} + +//----------MemBar Instructions----------------------------------------------- +// Memory barrier flavors + +instruct membar_acquire() %{ + match(MemBarAcquire); + match(LoadFence); + ins_cost(4*MEMORY_REF_COST); + + size(0); + format %{ "MEMBAR-acquire" %} + ins_encode( enc_membar_acquire ); + ins_pipe(long_memory_op); +%} + +instruct membar_acquire_lock() %{ + match(MemBarAcquireLock); + ins_cost(0); + + size(0); + format %{ "!MEMBAR-acquire (CAS in prior FastLock so empty encoding)" %} + ins_encode( ); + ins_pipe(empty); +%} + +instruct membar_release() %{ + match(MemBarRelease); + match(StoreFence); + ins_cost(4*MEMORY_REF_COST); + + size(0); + format %{ "MEMBAR-release" %} + ins_encode( enc_membar_release ); + ins_pipe(long_memory_op); +%} + +instruct membar_release_lock() %{ + match(MemBarReleaseLock); + ins_cost(0); + + size(0); + format %{ "!MEMBAR-release (CAS in succeeding FastUnlock so empty encoding)" %} + ins_encode( ); + ins_pipe(empty); +%} + +instruct membar_volatile() %{ + match(MemBarVolatile); + ins_cost(4*MEMORY_REF_COST); + + size(4); + format %{ "MEMBAR-volatile" %} + ins_encode( enc_membar_volatile ); + ins_pipe(long_memory_op); +%} + +instruct unnecessary_membar_volatile() %{ + match(MemBarVolatile); + predicate(Matcher::post_store_load_barrier(n)); + ins_cost(0); + + size(0); + format %{ "!MEMBAR-volatile (unnecessary so empty encoding)" %} + ins_encode( ); + ins_pipe(empty); +%} + +instruct membar_storestore() %{ + match(MemBarStoreStore); + ins_cost(0); + + size(0); + format %{ "!MEMBAR-storestore (empty encoding)" %} + ins_encode( ); + ins_pipe(empty); +%} + +//----------Register Move Instructions----------------------------------------- +instruct roundDouble_nop(regD dst) %{ + match(Set dst (RoundDouble dst)); + ins_cost(0); + // SPARC results are already "rounded" (i.e., normal-format IEEE) + ins_encode( ); + ins_pipe(empty); +%} + + +instruct roundFloat_nop(regF dst) %{ + match(Set dst (RoundFloat dst)); + ins_cost(0); + // SPARC results are already "rounded" (i.e., normal-format IEEE) + ins_encode( ); + ins_pipe(empty); +%} + + +// Cast Index to Pointer for unsafe natives +instruct castX2P(iRegX src, iRegP dst) %{ + match(Set dst (CastX2P src)); + + format %{ "MOV $src,$dst\t! IntX->Ptr" %} + ins_encode( form3_g0_rs2_rd_move( src, dst ) ); + ins_pipe(ialu_reg); +%} + +// Cast Pointer to Index for unsafe natives +instruct castP2X(iRegP src, iRegX dst) %{ + match(Set dst (CastP2X src)); + + format %{ "MOV $src,$dst\t! Ptr->IntX" %} + ins_encode( form3_g0_rs2_rd_move( src, dst ) ); + ins_pipe(ialu_reg); +%} + +instruct stfSSD(stackSlotD stkSlot, regD src) %{ + // %%%% TO DO: Tell the coalescer that this kind of node is a copy! + match(Set stkSlot src); // chain rule + ins_cost(MEMORY_REF_COST); + format %{ "STDF $src,$stkSlot\t!stk" %} + opcode(Assembler::stdf_op3); + ins_encode(simple_form3_mem_reg(stkSlot, src)); + ins_pipe(fstoreD_stk_reg); +%} + +instruct ldfSSD(regD dst, stackSlotD stkSlot) %{ + // %%%% TO DO: Tell the coalescer that this kind of node is a copy! + match(Set dst stkSlot); // chain rule + ins_cost(MEMORY_REF_COST); + format %{ "LDDF $stkSlot,$dst\t!stk" %} + opcode(Assembler::lddf_op3); + ins_encode(simple_form3_mem_reg(stkSlot, dst)); + ins_pipe(floadD_stk); +%} + +instruct stfSSF(stackSlotF stkSlot, regF src) %{ + // %%%% TO DO: Tell the coalescer that this kind of node is a copy! + match(Set stkSlot src); // chain rule + ins_cost(MEMORY_REF_COST); + format %{ "STF $src,$stkSlot\t!stk" %} + opcode(Assembler::stf_op3); + ins_encode(simple_form3_mem_reg(stkSlot, src)); + ins_pipe(fstoreF_stk_reg); +%} + +//----------Conditional Move--------------------------------------------------- +// Conditional move +instruct cmovIP_reg(cmpOpP cmp, flagsRegP pcc, iRegI dst, iRegI src) %{ + match(Set dst (CMoveI (Binary cmp pcc) (Binary dst src))); + ins_cost(150); + format %{ "MOV$cmp $pcc,$src,$dst" %} + ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::ptr_cc)) ); + ins_pipe(ialu_reg); +%} + +instruct cmovIP_imm(cmpOpP cmp, flagsRegP pcc, iRegI dst, immI11 src) %{ + match(Set dst (CMoveI (Binary cmp pcc) (Binary dst src))); + ins_cost(140); + format %{ "MOV$cmp $pcc,$src,$dst" %} + ins_encode( enc_cmov_imm(cmp,dst,src, (Assembler::ptr_cc)) ); + ins_pipe(ialu_imm); +%} + +instruct cmovII_reg(cmpOp cmp, flagsReg icc, iRegI dst, iRegI src) %{ + match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); + ins_cost(150); + size(4); + format %{ "MOV$cmp $icc,$src,$dst" %} + ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::icc)) ); + ins_pipe(ialu_reg); +%} + +instruct cmovII_imm(cmpOp cmp, flagsReg icc, iRegI dst, immI11 src) %{ + match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); + ins_cost(140); + size(4); + format %{ "MOV$cmp $icc,$src,$dst" %} + ins_encode( enc_cmov_imm(cmp,dst,src, (Assembler::icc)) ); + ins_pipe(ialu_imm); +%} + +instruct cmovIIu_reg(cmpOpU cmp, flagsRegU icc, iRegI dst, iRegI src) %{ + match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); + ins_cost(150); + size(4); + format %{ "MOV$cmp $icc,$src,$dst" %} + ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::icc)) ); + ins_pipe(ialu_reg); +%} + +instruct cmovIIu_imm(cmpOpU cmp, flagsRegU icc, iRegI dst, immI11 src) %{ + match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); + ins_cost(140); + size(4); + format %{ "MOV$cmp $icc,$src,$dst" %} + ins_encode( enc_cmov_imm(cmp,dst,src, (Assembler::icc)) ); + ins_pipe(ialu_imm); +%} + +instruct cmovIF_reg(cmpOpF cmp, flagsRegF fcc, iRegI dst, iRegI src) %{ + match(Set dst (CMoveI (Binary cmp fcc) (Binary dst src))); + ins_cost(150); + size(4); + format %{ "MOV$cmp $fcc,$src,$dst" %} + ins_encode( enc_cmov_reg_f(cmp,dst,src, fcc) ); + ins_pipe(ialu_reg); +%} + +instruct cmovIF_imm(cmpOpF cmp, flagsRegF fcc, iRegI dst, immI11 src) %{ + match(Set dst (CMoveI (Binary cmp fcc) (Binary dst src))); + ins_cost(140); + size(4); + format %{ "MOV$cmp $fcc,$src,$dst" %} + ins_encode( enc_cmov_imm_f(cmp,dst,src, fcc) ); + ins_pipe(ialu_imm); +%} + +// Conditional move for RegN. Only cmov(reg,reg). +instruct cmovNP_reg(cmpOpP cmp, flagsRegP pcc, iRegN dst, iRegN src) %{ + match(Set dst (CMoveN (Binary cmp pcc) (Binary dst src))); + ins_cost(150); + format %{ "MOV$cmp $pcc,$src,$dst" %} + ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::ptr_cc)) ); + ins_pipe(ialu_reg); +%} + +// This instruction also works with CmpN so we don't need cmovNN_reg. +instruct cmovNI_reg(cmpOp cmp, flagsReg icc, iRegN dst, iRegN src) %{ + match(Set dst (CMoveN (Binary cmp icc) (Binary dst src))); + ins_cost(150); + size(4); + format %{ "MOV$cmp $icc,$src,$dst" %} + ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::icc)) ); + ins_pipe(ialu_reg); +%} + +// This instruction also works with CmpN so we don't need cmovNN_reg. +instruct cmovNIu_reg(cmpOpU cmp, flagsRegU icc, iRegN dst, iRegN src) %{ + match(Set dst (CMoveN (Binary cmp icc) (Binary dst src))); + ins_cost(150); + size(4); + format %{ "MOV$cmp $icc,$src,$dst" %} + ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::icc)) ); + ins_pipe(ialu_reg); +%} + +instruct cmovNF_reg(cmpOpF cmp, flagsRegF fcc, iRegN dst, iRegN src) %{ + match(Set dst (CMoveN (Binary cmp fcc) (Binary dst src))); + ins_cost(150); + size(4); + format %{ "MOV$cmp $fcc,$src,$dst" %} + ins_encode( enc_cmov_reg_f(cmp,dst,src, fcc) ); + ins_pipe(ialu_reg); +%} + +// Conditional move +instruct cmovPP_reg(cmpOpP cmp, flagsRegP pcc, iRegP dst, iRegP src) %{ + match(Set dst (CMoveP (Binary cmp pcc) (Binary dst src))); + ins_cost(150); + format %{ "MOV$cmp $pcc,$src,$dst\t! ptr" %} + ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::ptr_cc)) ); + ins_pipe(ialu_reg); +%} + +instruct cmovPP_imm(cmpOpP cmp, flagsRegP pcc, iRegP dst, immP0 src) %{ + match(Set dst (CMoveP (Binary cmp pcc) (Binary dst src))); + ins_cost(140); + format %{ "MOV$cmp $pcc,$src,$dst\t! ptr" %} + ins_encode( enc_cmov_imm(cmp,dst,src, (Assembler::ptr_cc)) ); + ins_pipe(ialu_imm); +%} + +// This instruction also works with CmpN so we don't need cmovPN_reg. +instruct cmovPI_reg(cmpOp cmp, flagsReg icc, iRegP dst, iRegP src) %{ + match(Set dst (CMoveP (Binary cmp icc) (Binary dst src))); + ins_cost(150); + + size(4); + format %{ "MOV$cmp $icc,$src,$dst\t! ptr" %} + ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::icc)) ); + ins_pipe(ialu_reg); +%} + +instruct cmovPIu_reg(cmpOpU cmp, flagsRegU icc, iRegP dst, iRegP src) %{ + match(Set dst (CMoveP (Binary cmp icc) (Binary dst src))); + ins_cost(150); + + size(4); + format %{ "MOV$cmp $icc,$src,$dst\t! ptr" %} + ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::icc)) ); + ins_pipe(ialu_reg); +%} + +instruct cmovPI_imm(cmpOp cmp, flagsReg icc, iRegP dst, immP0 src) %{ + match(Set dst (CMoveP (Binary cmp icc) (Binary dst src))); + ins_cost(140); + + size(4); + format %{ "MOV$cmp $icc,$src,$dst\t! ptr" %} + ins_encode( enc_cmov_imm(cmp,dst,src, (Assembler::icc)) ); + ins_pipe(ialu_imm); +%} + +instruct cmovPIu_imm(cmpOpU cmp, flagsRegU icc, iRegP dst, immP0 src) %{ + match(Set dst (CMoveP (Binary cmp icc) (Binary dst src))); + ins_cost(140); + + size(4); + format %{ "MOV$cmp $icc,$src,$dst\t! ptr" %} + ins_encode( enc_cmov_imm(cmp,dst,src, (Assembler::icc)) ); + ins_pipe(ialu_imm); +%} + +instruct cmovPF_reg(cmpOpF cmp, flagsRegF fcc, iRegP dst, iRegP src) %{ + match(Set dst (CMoveP (Binary cmp fcc) (Binary dst src))); + ins_cost(150); + size(4); + format %{ "MOV$cmp $fcc,$src,$dst" %} + ins_encode( enc_cmov_reg_f(cmp,dst,src, fcc) ); + ins_pipe(ialu_imm); +%} + +instruct cmovPF_imm(cmpOpF cmp, flagsRegF fcc, iRegP dst, immP0 src) %{ + match(Set dst (CMoveP (Binary cmp fcc) (Binary dst src))); + ins_cost(140); + size(4); + format %{ "MOV$cmp $fcc,$src,$dst" %} + ins_encode( enc_cmov_imm_f(cmp,dst,src, fcc) ); + ins_pipe(ialu_imm); +%} + +// Conditional move +instruct cmovFP_reg(cmpOpP cmp, flagsRegP pcc, regF dst, regF src) %{ + match(Set dst (CMoveF (Binary cmp pcc) (Binary dst src))); + ins_cost(150); + opcode(0x101); + format %{ "FMOVD$cmp $pcc,$src,$dst" %} + ins_encode( enc_cmovf_reg(cmp,dst,src, (Assembler::ptr_cc)) ); + ins_pipe(int_conditional_float_move); +%} + +instruct cmovFI_reg(cmpOp cmp, flagsReg icc, regF dst, regF src) %{ + match(Set dst (CMoveF (Binary cmp icc) (Binary dst src))); + ins_cost(150); + + size(4); + format %{ "FMOVS$cmp $icc,$src,$dst" %} + opcode(0x101); + ins_encode( enc_cmovf_reg(cmp,dst,src, (Assembler::icc)) ); + ins_pipe(int_conditional_float_move); +%} + +instruct cmovFIu_reg(cmpOpU cmp, flagsRegU icc, regF dst, regF src) %{ + match(Set dst (CMoveF (Binary cmp icc) (Binary dst src))); + ins_cost(150); + + size(4); + format %{ "FMOVS$cmp $icc,$src,$dst" %} + opcode(0x101); + ins_encode( enc_cmovf_reg(cmp,dst,src, (Assembler::icc)) ); + ins_pipe(int_conditional_float_move); +%} + +// Conditional move, +instruct cmovFF_reg(cmpOpF cmp, flagsRegF fcc, regF dst, regF src) %{ + match(Set dst (CMoveF (Binary cmp fcc) (Binary dst src))); + ins_cost(150); + size(4); + format %{ "FMOVF$cmp $fcc,$src,$dst" %} + opcode(0x1); + ins_encode( enc_cmovff_reg(cmp,fcc,dst,src) ); + ins_pipe(int_conditional_double_move); +%} + +// Conditional move +instruct cmovDP_reg(cmpOpP cmp, flagsRegP pcc, regD dst, regD src) %{ + match(Set dst (CMoveD (Binary cmp pcc) (Binary dst src))); + ins_cost(150); + size(4); + opcode(0x102); + format %{ "FMOVD$cmp $pcc,$src,$dst" %} + ins_encode( enc_cmovf_reg(cmp,dst,src, (Assembler::ptr_cc)) ); + ins_pipe(int_conditional_double_move); +%} + +instruct cmovDI_reg(cmpOp cmp, flagsReg icc, regD dst, regD src) %{ + match(Set dst (CMoveD (Binary cmp icc) (Binary dst src))); + ins_cost(150); + + size(4); + format %{ "FMOVD$cmp $icc,$src,$dst" %} + opcode(0x102); + ins_encode( enc_cmovf_reg(cmp,dst,src, (Assembler::icc)) ); + ins_pipe(int_conditional_double_move); +%} + +instruct cmovDIu_reg(cmpOpU cmp, flagsRegU icc, regD dst, regD src) %{ + match(Set dst (CMoveD (Binary cmp icc) (Binary dst src))); + ins_cost(150); + + size(4); + format %{ "FMOVD$cmp $icc,$src,$dst" %} + opcode(0x102); + ins_encode( enc_cmovf_reg(cmp,dst,src, (Assembler::icc)) ); + ins_pipe(int_conditional_double_move); +%} + +// Conditional move, +instruct cmovDF_reg(cmpOpF cmp, flagsRegF fcc, regD dst, regD src) %{ + match(Set dst (CMoveD (Binary cmp fcc) (Binary dst src))); + ins_cost(150); + size(4); + format %{ "FMOVD$cmp $fcc,$src,$dst" %} + opcode(0x2); + ins_encode( enc_cmovff_reg(cmp,fcc,dst,src) ); + ins_pipe(int_conditional_double_move); +%} + +// Conditional move +instruct cmovLP_reg(cmpOpP cmp, flagsRegP pcc, iRegL dst, iRegL src) %{ + match(Set dst (CMoveL (Binary cmp pcc) (Binary dst src))); + ins_cost(150); + format %{ "MOV$cmp $pcc,$src,$dst\t! long" %} + ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::ptr_cc)) ); + ins_pipe(ialu_reg); +%} + +instruct cmovLP_imm(cmpOpP cmp, flagsRegP pcc, iRegL dst, immI11 src) %{ + match(Set dst (CMoveL (Binary cmp pcc) (Binary dst src))); + ins_cost(140); + format %{ "MOV$cmp $pcc,$src,$dst\t! long" %} + ins_encode( enc_cmov_imm(cmp,dst,src, (Assembler::ptr_cc)) ); + ins_pipe(ialu_imm); +%} + +instruct cmovLI_reg(cmpOp cmp, flagsReg icc, iRegL dst, iRegL src) %{ + match(Set dst (CMoveL (Binary cmp icc) (Binary dst src))); + ins_cost(150); + + size(4); + format %{ "MOV$cmp $icc,$src,$dst\t! long" %} + ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::icc)) ); + ins_pipe(ialu_reg); +%} + + +instruct cmovLIu_reg(cmpOpU cmp, flagsRegU icc, iRegL dst, iRegL src) %{ + match(Set dst (CMoveL (Binary cmp icc) (Binary dst src))); + ins_cost(150); + + size(4); + format %{ "MOV$cmp $icc,$src,$dst\t! long" %} + ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::icc)) ); + ins_pipe(ialu_reg); +%} + + +instruct cmovLF_reg(cmpOpF cmp, flagsRegF fcc, iRegL dst, iRegL src) %{ + match(Set dst (CMoveL (Binary cmp fcc) (Binary dst src))); + ins_cost(150); + + size(4); + format %{ "MOV$cmp $fcc,$src,$dst\t! long" %} + ins_encode( enc_cmov_reg_f(cmp,dst,src, fcc) ); + ins_pipe(ialu_reg); +%} + + + +//----------OS and Locking Instructions---------------------------------------- + +// This name is KNOWN by the ADLC and cannot be changed. +// The ADLC forces a 'TypeRawPtr::BOTTOM' output type +// for this guy. +instruct tlsLoadP(g2RegP dst) %{ + match(Set dst (ThreadLocal)); + + size(0); + ins_cost(0); + format %{ "# TLS is in G2" %} + ins_encode( /*empty encoding*/ ); + ins_pipe(ialu_none); +%} + +instruct checkCastPP( iRegP dst ) %{ + match(Set dst (CheckCastPP dst)); + + size(0); + format %{ "# checkcastPP of $dst" %} + ins_encode( /*empty encoding*/ ); + ins_pipe(empty); +%} + + +instruct castPP( iRegP dst ) %{ + match(Set dst (CastPP dst)); + format %{ "# castPP of $dst" %} + ins_encode( /*empty encoding*/ ); + ins_pipe(empty); +%} + +instruct castII( iRegI dst ) %{ + match(Set dst (CastII dst)); + format %{ "# castII of $dst" %} + ins_encode( /*empty encoding*/ ); + ins_cost(0); + ins_pipe(empty); +%} + +instruct castLL( iRegL dst ) %{ + match(Set dst (CastLL dst)); + format %{ "# castLL of $dst" %} + ins_encode( /*empty encoding*/ ); + ins_cost(0); + ins_pipe(empty); +%} + +instruct castFF( regF dst ) %{ + match(Set dst (CastFF dst)); + format %{ "# castFF of $dst" %} + ins_encode( /*empty encoding*/ ); + ins_cost(0); + ins_pipe(empty); +%} + +instruct castDD( regD dst ) %{ + match(Set dst (CastDD dst)); + format %{ "# castDD of $dst" %} + ins_encode( /*empty encoding*/ ); + ins_cost(0); + ins_pipe(empty); +%} + +instruct castVV( iRegL dst ) %{ + match(Set dst (CastVV dst)); + format %{ "# castVV of $dst" %} + ins_encode( /*empty encoding*/ ); + ins_cost(0); + ins_pipe(empty); +%} + +//----------Arithmetic Instructions-------------------------------------------- +// Addition Instructions +// Register Addition +instruct addI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (AddI src1 src2)); + + size(4); + format %{ "ADD $src1,$src2,$dst" %} + ins_encode %{ + __ add($src1$$Register, $src2$$Register, $dst$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +// Immediate Addition +instruct addI_reg_imm13(iRegI dst, iRegI src1, immI13 src2) %{ + match(Set dst (AddI src1 src2)); + + size(4); + format %{ "ADD $src1,$src2,$dst" %} + opcode(Assembler::add_op3, Assembler::arith_op); + ins_encode( form3_rs1_simm13_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_imm); +%} + +// Pointer Register Addition +instruct addP_reg_reg(iRegP dst, iRegP src1, iRegX src2) %{ + match(Set dst (AddP src1 src2)); + + size(4); + format %{ "ADD $src1,$src2,$dst" %} + opcode(Assembler::add_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_reg); +%} + +// Pointer Immediate Addition +instruct addP_reg_imm13(iRegP dst, iRegP src1, immX13 src2) %{ + match(Set dst (AddP src1 src2)); + + size(4); + format %{ "ADD $src1,$src2,$dst" %} + opcode(Assembler::add_op3, Assembler::arith_op); + ins_encode( form3_rs1_simm13_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_imm); +%} + +// Long Addition +instruct addL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ + match(Set dst (AddL src1 src2)); + + size(4); + format %{ "ADD $src1,$src2,$dst\t! long" %} + opcode(Assembler::add_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_reg); +%} + +instruct addL_reg_imm13(iRegL dst, iRegL src1, immL13 con) %{ + match(Set dst (AddL src1 con)); + + size(4); + format %{ "ADD $src1,$con,$dst" %} + opcode(Assembler::add_op3, Assembler::arith_op); + ins_encode( form3_rs1_simm13_rd( src1, con, dst ) ); + ins_pipe(ialu_reg_imm); +%} + +//----------Conditional_store-------------------------------------------------- +// Conditional-store of the updated heap-top. +// Used during allocation of the shared heap. +// Sets flags (EQ) on success. Implemented with a CASA on Sparc. + +// LoadP-locked. Same as a regular pointer load when used with a compare-swap +instruct loadPLocked(iRegP dst, memory mem) %{ + match(Set dst (LoadPLocked mem)); + ins_cost(MEMORY_REF_COST); + + format %{ "LDX $mem,$dst\t! ptr" %} + opcode(Assembler::ldx_op3, 0, REGP_OP); + ins_encode( form3_mem_reg( mem, dst ) ); + ins_pipe(iload_mem); +%} + +instruct storePConditional( iRegP heap_top_ptr, iRegP oldval, g3RegP newval, flagsRegP pcc ) %{ + match(Set pcc (StorePConditional heap_top_ptr (Binary oldval newval))); + effect( KILL newval ); + format %{ "CASA [$heap_top_ptr],$oldval,R_G3\t! If $oldval==[$heap_top_ptr] Then store R_G3 into [$heap_top_ptr], set R_G3=[$heap_top_ptr] in any case\n\t" + "CMP R_G3,$oldval\t\t! See if we made progress" %} + ins_encode( enc_cas(heap_top_ptr,oldval,newval) ); + ins_pipe( long_memory_op ); +%} + +// Conditional-store of an int value. +instruct storeIConditional( iRegP mem_ptr, iRegI oldval, g3RegI newval, flagsReg icc ) %{ + match(Set icc (StoreIConditional mem_ptr (Binary oldval newval))); + effect( KILL newval ); + format %{ "CASA [$mem_ptr],$oldval,$newval\t! If $oldval==[$mem_ptr] Then store $newval into [$mem_ptr], set $newval=[$mem_ptr] in any case\n\t" + "CMP $oldval,$newval\t\t! See if we made progress" %} + ins_encode( enc_cas(mem_ptr,oldval,newval) ); + ins_pipe( long_memory_op ); +%} + +// Conditional-store of a long value. +instruct storeLConditional( iRegP mem_ptr, iRegL oldval, g3RegL newval, flagsRegL xcc ) %{ + match(Set xcc (StoreLConditional mem_ptr (Binary oldval newval))); + effect( KILL newval ); + format %{ "CASXA [$mem_ptr],$oldval,$newval\t! If $oldval==[$mem_ptr] Then store $newval into [$mem_ptr], set $newval=[$mem_ptr] in any case\n\t" + "CMP $oldval,$newval\t\t! See if we made progress" %} + ins_encode( enc_cas(mem_ptr,oldval,newval) ); + ins_pipe( long_memory_op ); +%} + +// No flag versions for CompareAndSwap{P,I,L} because matcher can't match them + +instruct compareAndSwapL_bool(iRegP mem_ptr, iRegL oldval, iRegL newval, iRegI res, o7RegI tmp1, flagsReg ccr ) %{ + predicate(VM_Version::supports_cx8()); + match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); + match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); + effect( USE mem_ptr, KILL ccr, KILL tmp1); + format %{ + "MOV $newval,O7\n\t" + "CASXA [$mem_ptr],$oldval,O7\t! If $oldval==[$mem_ptr] Then store O7 into [$mem_ptr], set O7=[$mem_ptr] in any case\n\t" + "CMP $oldval,O7\t\t! See if we made progress\n\t" + "MOV 1,$res\n\t" + "MOVne xcc,R_G0,$res" + %} + ins_encode( enc_casx(mem_ptr, oldval, newval), + enc_lflags_ne_to_boolean(res) ); + ins_pipe( long_memory_op ); +%} + + +instruct compareAndSwapI_bool(iRegP mem_ptr, iRegI oldval, iRegI newval, iRegI res, o7RegI tmp1, flagsReg ccr ) %{ + match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); + match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); + effect( USE mem_ptr, KILL ccr, KILL tmp1); + format %{ + "MOV $newval,O7\n\t" + "CASA [$mem_ptr],$oldval,O7\t! If $oldval==[$mem_ptr] Then store O7 into [$mem_ptr], set O7=[$mem_ptr] in any case\n\t" + "CMP $oldval,O7\t\t! See if we made progress\n\t" + "MOV 1,$res\n\t" + "MOVne icc,R_G0,$res" + %} + ins_encode( enc_casi(mem_ptr, oldval, newval), + enc_iflags_ne_to_boolean(res) ); + ins_pipe( long_memory_op ); +%} + +instruct compareAndSwapP_bool(iRegP mem_ptr, iRegP oldval, iRegP newval, iRegI res, o7RegI tmp1, flagsReg ccr ) %{ + predicate(VM_Version::supports_cx8()); + match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); + match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); + effect( USE mem_ptr, KILL ccr, KILL tmp1); + format %{ + "MOV $newval,O7\n\t" + "CASA_PTR [$mem_ptr],$oldval,O7\t! If $oldval==[$mem_ptr] Then store O7 into [$mem_ptr], set O7=[$mem_ptr] in any case\n\t" + "CMP $oldval,O7\t\t! See if we made progress\n\t" + "MOV 1,$res\n\t" + "MOVne xcc,R_G0,$res" + %} + ins_encode( enc_casx(mem_ptr, oldval, newval), + enc_lflags_ne_to_boolean(res) ); + ins_pipe( long_memory_op ); +%} + +instruct compareAndSwapN_bool(iRegP mem_ptr, iRegN oldval, iRegN newval, iRegI res, o7RegI tmp1, flagsReg ccr ) %{ + match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval))); + match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval))); + effect( USE mem_ptr, KILL ccr, KILL tmp1); + format %{ + "MOV $newval,O7\n\t" + "CASA [$mem_ptr],$oldval,O7\t! If $oldval==[$mem_ptr] Then store O7 into [$mem_ptr], set O7=[$mem_ptr] in any case\n\t" + "CMP $oldval,O7\t\t! See if we made progress\n\t" + "MOV 1,$res\n\t" + "MOVne icc,R_G0,$res" + %} + ins_encode( enc_casi(mem_ptr, oldval, newval), + enc_iflags_ne_to_boolean(res) ); + ins_pipe( long_memory_op ); +%} + +instruct compareAndExchangeI(iRegP mem_ptr, iRegI oldval, iRegI newval) +%{ + match(Set newval (CompareAndExchangeI mem_ptr (Binary oldval newval))); + effect( USE mem_ptr ); + + format %{ + "CASA [$mem_ptr],$oldval,$newval\t! If $oldval==[$mem_ptr] Then store $newval into [$mem_ptr] and set $newval=[$mem_ptr]\n\t" + %} + ins_encode( enc_casi_exch(mem_ptr, oldval, newval) ); + ins_pipe( long_memory_op ); +%} + +instruct compareAndExchangeL(iRegP mem_ptr, iRegL oldval, iRegL newval) +%{ + match(Set newval (CompareAndExchangeL mem_ptr (Binary oldval newval))); + effect( USE mem_ptr ); + + format %{ + "CASXA [$mem_ptr],$oldval,$newval\t! If $oldval==[$mem_ptr] Then store $newval into [$mem_ptr] and set $newval=[$mem_ptr]\n\t" + %} + ins_encode( enc_casx_exch(mem_ptr, oldval, newval) ); + ins_pipe( long_memory_op ); +%} + +instruct compareAndExchangeP(iRegP mem_ptr, iRegP oldval, iRegP newval) +%{ + match(Set newval (CompareAndExchangeP mem_ptr (Binary oldval newval))); + effect( USE mem_ptr ); + + format %{ + "CASXA [$mem_ptr],$oldval,$newval\t! If $oldval==[$mem_ptr] Then store $newval into [$mem_ptr] and set $newval=[$mem_ptr]\n\t" + %} + ins_encode( enc_casx_exch(mem_ptr, oldval, newval) ); + ins_pipe( long_memory_op ); +%} + +instruct compareAndExchangeN(iRegP mem_ptr, iRegN oldval, iRegN newval) +%{ + match(Set newval (CompareAndExchangeN mem_ptr (Binary oldval newval))); + effect( USE mem_ptr ); + + format %{ + "CASA [$mem_ptr],$oldval,$newval\t! If $oldval==[$mem_ptr] Then store $newval into [$mem_ptr] and set $newval=[$mem_ptr]\n\t" + %} + ins_encode( enc_casi_exch(mem_ptr, oldval, newval) ); + ins_pipe( long_memory_op ); +%} + +instruct xchgI( memory mem, iRegI newval) %{ + match(Set newval (GetAndSetI mem newval)); + format %{ "SWAP [$mem],$newval" %} + size(4); + ins_encode %{ + __ swap($mem$$Address, $newval$$Register); + %} + ins_pipe( long_memory_op ); +%} + + +instruct xchgN( memory mem, iRegN newval) %{ + match(Set newval (GetAndSetN mem newval)); + format %{ "SWAP [$mem],$newval" %} + size(4); + ins_encode %{ + __ swap($mem$$Address, $newval$$Register); + %} + ins_pipe( long_memory_op ); +%} + +//--------------------- +// Subtraction Instructions +// Register Subtraction +instruct subI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (SubI src1 src2)); + + size(4); + format %{ "SUB $src1,$src2,$dst" %} + opcode(Assembler::sub_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_reg); +%} + +// Immediate Subtraction +instruct subI_reg_imm13(iRegI dst, iRegI src1, immI13 src2) %{ + match(Set dst (SubI src1 src2)); + + size(4); + format %{ "SUB $src1,$src2,$dst" %} + opcode(Assembler::sub_op3, Assembler::arith_op); + ins_encode( form3_rs1_simm13_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_imm); +%} + +instruct subI_zero_reg(iRegI dst, immI0 zero, iRegI src2) %{ + match(Set dst (SubI zero src2)); + + size(4); + format %{ "NEG $src2,$dst" %} + opcode(Assembler::sub_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( R_G0, src2, dst ) ); + ins_pipe(ialu_zero_reg); +%} + +// Long subtraction +instruct subL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ + match(Set dst (SubL src1 src2)); + + size(4); + format %{ "SUB $src1,$src2,$dst\t! long" %} + opcode(Assembler::sub_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_reg); +%} + +// Immediate Subtraction +instruct subL_reg_imm13(iRegL dst, iRegL src1, immL13 con) %{ + match(Set dst (SubL src1 con)); + + size(4); + format %{ "SUB $src1,$con,$dst\t! long" %} + opcode(Assembler::sub_op3, Assembler::arith_op); + ins_encode( form3_rs1_simm13_rd( src1, con, dst ) ); + ins_pipe(ialu_reg_imm); +%} + +// Long negation +instruct negL_reg_reg(iRegL dst, immL0 zero, iRegL src2) %{ + match(Set dst (SubL zero src2)); + + size(4); + format %{ "NEG $src2,$dst\t! long" %} + opcode(Assembler::sub_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( R_G0, src2, dst ) ); + ins_pipe(ialu_zero_reg); +%} + +// Multiplication Instructions +// Integer Multiplication +// Register Multiplication +instruct mulI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (MulI src1 src2)); + + size(4); + format %{ "MULX $src1,$src2,$dst" %} + opcode(Assembler::mulx_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) ); + ins_pipe(imul_reg_reg); +%} + +// Immediate Multiplication +instruct mulI_reg_imm13(iRegI dst, iRegI src1, immI13 src2) %{ + match(Set dst (MulI src1 src2)); + + size(4); + format %{ "MULX $src1,$src2,$dst" %} + opcode(Assembler::mulx_op3, Assembler::arith_op); + ins_encode( form3_rs1_simm13_rd( src1, src2, dst ) ); + ins_pipe(imul_reg_imm); +%} + +instruct mulL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ + match(Set dst (MulL src1 src2)); + ins_cost(DEFAULT_COST * 5); + size(4); + format %{ "MULX $src1,$src2,$dst\t! long" %} + opcode(Assembler::mulx_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) ); + ins_pipe(mulL_reg_reg); +%} + +// Immediate Multiplication +instruct mulL_reg_imm13(iRegL dst, iRegL src1, immL13 src2) %{ + match(Set dst (MulL src1 src2)); + ins_cost(DEFAULT_COST * 5); + size(4); + format %{ "MULX $src1,$src2,$dst" %} + opcode(Assembler::mulx_op3, Assembler::arith_op); + ins_encode( form3_rs1_simm13_rd( src1, src2, dst ) ); + ins_pipe(mulL_reg_imm); +%} + +// Integer Division +// Register Division +instruct divI_reg_reg(iRegI dst, iRegIsafe src1, iRegIsafe src2) %{ + match(Set dst (DivI src1 src2)); + ins_cost((2+71)*DEFAULT_COST); + + format %{ "SRA $src2,0,$src2\n\t" + "SRA $src1,0,$src1\n\t" + "SDIVX $src1,$src2,$dst" %} + ins_encode( idiv_reg( src1, src2, dst ) ); + ins_pipe(sdiv_reg_reg); +%} + +// Immediate Division +instruct divI_reg_imm13(iRegI dst, iRegIsafe src1, immI13 src2) %{ + match(Set dst (DivI src1 src2)); + ins_cost((2+71)*DEFAULT_COST); + + format %{ "SRA $src1,0,$src1\n\t" + "SDIVX $src1,$src2,$dst" %} + ins_encode( idiv_imm( src1, src2, dst ) ); + ins_pipe(sdiv_reg_imm); +%} + +//----------Div-By-10-Expansion------------------------------------------------ +// Extract hi bits of a 32x32->64 bit multiply. +// Expand rule only, not matched +instruct mul_hi(iRegIsafe dst, iRegIsafe src1, iRegIsafe src2 ) %{ + effect( DEF dst, USE src1, USE src2 ); + format %{ "MULX $src1,$src2,$dst\t! Used in div-by-10\n\t" + "SRLX $dst,#32,$dst\t\t! Extract only hi word of result" %} + ins_encode( enc_mul_hi(dst,src1,src2)); + ins_pipe(sdiv_reg_reg); +%} + +// Magic constant, reciprocal of 10 +instruct loadConI_x66666667(iRegIsafe dst) %{ + effect( DEF dst ); + + size(8); + format %{ "SET 0x66666667,$dst\t! Used in div-by-10" %} + ins_encode( Set32(0x66666667, dst) ); + ins_pipe(ialu_hi_lo_reg); +%} + +// Register Shift Right Arithmetic Long by 32-63 +instruct sra_31( iRegI dst, iRegI src ) %{ + effect( DEF dst, USE src ); + format %{ "SRA $src,31,$dst\t! Used in div-by-10" %} + ins_encode( form3_rs1_rd_copysign_hi(src,dst) ); + ins_pipe(ialu_reg_reg); +%} + +// Arithmetic Shift Right by 8-bit immediate +instruct sra_reg_2( iRegI dst, iRegI src ) %{ + effect( DEF dst, USE src ); + format %{ "SRA $src,2,$dst\t! Used in div-by-10" %} + opcode(Assembler::sra_op3, Assembler::arith_op); + ins_encode( form3_rs1_simm13_rd( src, 0x2, dst ) ); + ins_pipe(ialu_reg_imm); +%} + +// Integer DIV with 10 +instruct divI_10( iRegI dst, iRegIsafe src, immI10 div ) %{ + match(Set dst (DivI src div)); + ins_cost((6+6)*DEFAULT_COST); + expand %{ + iRegIsafe tmp1; // Killed temps; + iRegIsafe tmp2; // Killed temps; + iRegI tmp3; // Killed temps; + iRegI tmp4; // Killed temps; + loadConI_x66666667( tmp1 ); // SET 0x66666667 -> tmp1 + mul_hi( tmp2, src, tmp1 ); // MUL hibits(src * tmp1) -> tmp2 + sra_31( tmp3, src ); // SRA src,31 -> tmp3 + sra_reg_2( tmp4, tmp2 ); // SRA tmp2,2 -> tmp4 + subI_reg_reg( dst,tmp4,tmp3); // SUB tmp4 - tmp3 -> dst + %} +%} + +// Register Long Division +instruct divL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ + match(Set dst (DivL src1 src2)); + ins_cost(DEFAULT_COST*71); + size(4); + format %{ "SDIVX $src1,$src2,$dst\t! long" %} + opcode(Assembler::sdivx_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) ); + ins_pipe(divL_reg_reg); +%} + +// Register Long Division +instruct divL_reg_imm13(iRegL dst, iRegL src1, immL13 src2) %{ + match(Set dst (DivL src1 src2)); + ins_cost(DEFAULT_COST*71); + size(4); + format %{ "SDIVX $src1,$src2,$dst\t! long" %} + opcode(Assembler::sdivx_op3, Assembler::arith_op); + ins_encode( form3_rs1_simm13_rd( src1, src2, dst ) ); + ins_pipe(divL_reg_imm); +%} + +// Integer Remainder +// Register Remainder +instruct modI_reg_reg(iRegI dst, iRegIsafe src1, iRegIsafe src2, o7RegP temp, flagsReg ccr ) %{ + match(Set dst (ModI src1 src2)); + effect( KILL ccr, KILL temp); + + format %{ "SREM $src1,$src2,$dst" %} + ins_encode( irem_reg(src1, src2, dst, temp) ); + ins_pipe(sdiv_reg_reg); +%} + +// Immediate Remainder +instruct modI_reg_imm13(iRegI dst, iRegIsafe src1, immI13 src2, o7RegP temp, flagsReg ccr ) %{ + match(Set dst (ModI src1 src2)); + effect( KILL ccr, KILL temp); + + format %{ "SREM $src1,$src2,$dst" %} + ins_encode( irem_imm(src1, src2, dst, temp) ); + ins_pipe(sdiv_reg_imm); +%} + +// Register Long Remainder +instruct divL_reg_reg_1(iRegL dst, iRegL src1, iRegL src2) %{ + effect(DEF dst, USE src1, USE src2); + size(4); + format %{ "SDIVX $src1,$src2,$dst\t! long" %} + opcode(Assembler::sdivx_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) ); + ins_pipe(divL_reg_reg); +%} + +// Register Long Division +instruct divL_reg_imm13_1(iRegL dst, iRegL src1, immL13 src2) %{ + effect(DEF dst, USE src1, USE src2); + size(4); + format %{ "SDIVX $src1,$src2,$dst\t! long" %} + opcode(Assembler::sdivx_op3, Assembler::arith_op); + ins_encode( form3_rs1_simm13_rd( src1, src2, dst ) ); + ins_pipe(divL_reg_imm); +%} + +instruct mulL_reg_reg_1(iRegL dst, iRegL src1, iRegL src2) %{ + effect(DEF dst, USE src1, USE src2); + size(4); + format %{ "MULX $src1,$src2,$dst\t! long" %} + opcode(Assembler::mulx_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) ); + ins_pipe(mulL_reg_reg); +%} + +// Immediate Multiplication +instruct mulL_reg_imm13_1(iRegL dst, iRegL src1, immL13 src2) %{ + effect(DEF dst, USE src1, USE src2); + size(4); + format %{ "MULX $src1,$src2,$dst" %} + opcode(Assembler::mulx_op3, Assembler::arith_op); + ins_encode( form3_rs1_simm13_rd( src1, src2, dst ) ); + ins_pipe(mulL_reg_imm); +%} + +instruct subL_reg_reg_1(iRegL dst, iRegL src1, iRegL src2) %{ + effect(DEF dst, USE src1, USE src2); + size(4); + format %{ "SUB $src1,$src2,$dst\t! long" %} + opcode(Assembler::sub_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_reg); +%} + +instruct subL_reg_reg_2(iRegL dst, iRegL src1, iRegL src2) %{ + effect(DEF dst, USE src1, USE src2); + size(4); + format %{ "SUB $src1,$src2,$dst\t! long" %} + opcode(Assembler::sub_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_reg); +%} + +// Register Long Remainder +instruct modL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ + match(Set dst (ModL src1 src2)); + ins_cost(DEFAULT_COST*(71 + 6 + 1)); + expand %{ + iRegL tmp1; + iRegL tmp2; + divL_reg_reg_1(tmp1, src1, src2); + mulL_reg_reg_1(tmp2, tmp1, src2); + subL_reg_reg_1(dst, src1, tmp2); + %} +%} + +// Register Long Remainder +instruct modL_reg_imm13(iRegL dst, iRegL src1, immL13 src2) %{ + match(Set dst (ModL src1 src2)); + ins_cost(DEFAULT_COST*(71 + 6 + 1)); + expand %{ + iRegL tmp1; + iRegL tmp2; + divL_reg_imm13_1(tmp1, src1, src2); + mulL_reg_imm13_1(tmp2, tmp1, src2); + subL_reg_reg_2 (dst, src1, tmp2); + %} +%} + +// Integer Shift Instructions +// Register Shift Left +instruct shlI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (LShiftI src1 src2)); + + size(4); + format %{ "SLL $src1,$src2,$dst" %} + opcode(Assembler::sll_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_reg); +%} + +// Register Shift Left Immediate +instruct shlI_reg_imm5(iRegI dst, iRegI src1, immU5 src2) %{ + match(Set dst (LShiftI src1 src2)); + + size(4); + format %{ "SLL $src1,$src2,$dst" %} + opcode(Assembler::sll_op3, Assembler::arith_op); + ins_encode( form3_rs1_imm5_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_imm); +%} + +// Register Shift Left +instruct shlL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{ + match(Set dst (LShiftL src1 src2)); + + size(4); + format %{ "SLLX $src1,$src2,$dst" %} + opcode(Assembler::sllx_op3, Assembler::arith_op); + ins_encode( form3_sd_rs1_rs2_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_reg); +%} + +// Register Shift Left Immediate +instruct shlL_reg_imm6(iRegL dst, iRegL src1, immU6 src2) %{ + match(Set dst (LShiftL src1 src2)); + + size(4); + format %{ "SLLX $src1,$src2,$dst" %} + opcode(Assembler::sllx_op3, Assembler::arith_op); + ins_encode( form3_sd_rs1_imm6_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_imm); +%} + +// Register Arithmetic Shift Right +instruct sarI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (RShiftI src1 src2)); + size(4); + format %{ "SRA $src1,$src2,$dst" %} + opcode(Assembler::sra_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_reg); +%} + +// Register Arithmetic Shift Right Immediate +instruct sarI_reg_imm5(iRegI dst, iRegI src1, immU5 src2) %{ + match(Set dst (RShiftI src1 src2)); + + size(4); + format %{ "SRA $src1,$src2,$dst" %} + opcode(Assembler::sra_op3, Assembler::arith_op); + ins_encode( form3_rs1_imm5_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_imm); +%} + +// Register Shift Right Arithmetic Long +instruct sarL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{ + match(Set dst (RShiftL src1 src2)); + + size(4); + format %{ "SRAX $src1,$src2,$dst" %} + opcode(Assembler::srax_op3, Assembler::arith_op); + ins_encode( form3_sd_rs1_rs2_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_reg); +%} + +// Register Shift Left Immediate +instruct sarL_reg_imm6(iRegL dst, iRegL src1, immU6 src2) %{ + match(Set dst (RShiftL src1 src2)); + + size(4); + format %{ "SRAX $src1,$src2,$dst" %} + opcode(Assembler::srax_op3, Assembler::arith_op); + ins_encode( form3_sd_rs1_imm6_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_imm); +%} + +// Register Shift Right +instruct shrI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (URShiftI src1 src2)); + + size(4); + format %{ "SRL $src1,$src2,$dst" %} + opcode(Assembler::srl_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_reg); +%} + +// Register Shift Right Immediate +instruct shrI_reg_imm5(iRegI dst, iRegI src1, immU5 src2) %{ + match(Set dst (URShiftI src1 src2)); + + size(4); + format %{ "SRL $src1,$src2,$dst" %} + opcode(Assembler::srl_op3, Assembler::arith_op); + ins_encode( form3_rs1_imm5_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_imm); +%} + +// Register Shift Right +instruct shrL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{ + match(Set dst (URShiftL src1 src2)); + + size(4); + format %{ "SRLX $src1,$src2,$dst" %} + opcode(Assembler::srlx_op3, Assembler::arith_op); + ins_encode( form3_sd_rs1_rs2_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_reg); +%} + +// Register Shift Right Immediate +instruct shrL_reg_imm6(iRegL dst, iRegL src1, immU6 src2) %{ + match(Set dst (URShiftL src1 src2)); + + size(4); + format %{ "SRLX $src1,$src2,$dst" %} + opcode(Assembler::srlx_op3, Assembler::arith_op); + ins_encode( form3_sd_rs1_imm6_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_imm); +%} + +// Register Shift Right Immediate with a CastP2X +instruct shrP_reg_imm6(iRegL dst, iRegP src1, immU6 src2) %{ + match(Set dst (URShiftL (CastP2X src1) src2)); + size(4); + format %{ "SRLX $src1,$src2,$dst\t! Cast ptr $src1 to long and shift" %} + opcode(Assembler::srlx_op3, Assembler::arith_op); + ins_encode( form3_sd_rs1_imm6_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_imm); +%} + + +//----------Floating Point Arithmetic Instructions----------------------------- + +// Add float single precision +instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (AddF src1 src2)); + + size(4); + format %{ "FADDS $src1,$src2,$dst" %} + opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fadds_opf); + ins_encode(form3_opf_rs1F_rs2F_rdF(src1, src2, dst)); + ins_pipe(faddF_reg_reg); +%} + +// Add float double precision +instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (AddD src1 src2)); + + size(4); + format %{ "FADDD $src1,$src2,$dst" %} + opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::faddd_opf); + ins_encode(form3_opf_rs1D_rs2D_rdD(src1, src2, dst)); + ins_pipe(faddD_reg_reg); +%} + +// Sub float single precision +instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (SubF src1 src2)); + + size(4); + format %{ "FSUBS $src1,$src2,$dst" %} + opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fsubs_opf); + ins_encode(form3_opf_rs1F_rs2F_rdF(src1, src2, dst)); + ins_pipe(faddF_reg_reg); +%} + +// Sub float double precision +instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (SubD src1 src2)); + + size(4); + format %{ "FSUBD $src1,$src2,$dst" %} + opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fsubd_opf); + ins_encode(form3_opf_rs1D_rs2D_rdD(src1, src2, dst)); + ins_pipe(faddD_reg_reg); +%} + +// Mul float single precision +instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (MulF src1 src2)); + + size(4); + format %{ "FMULS $src1,$src2,$dst" %} + opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fmuls_opf); + ins_encode(form3_opf_rs1F_rs2F_rdF(src1, src2, dst)); + ins_pipe(fmulF_reg_reg); +%} + +// Mul float double precision +instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (MulD src1 src2)); + + size(4); + format %{ "FMULD $src1,$src2,$dst" %} + opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fmuld_opf); + ins_encode(form3_opf_rs1D_rs2D_rdD(src1, src2, dst)); + ins_pipe(fmulD_reg_reg); +%} + +// Div float single precision +instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (DivF src1 src2)); + + size(4); + format %{ "FDIVS $src1,$src2,$dst" %} + opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fdivs_opf); + ins_encode(form3_opf_rs1F_rs2F_rdF(src1, src2, dst)); + ins_pipe(fdivF_reg_reg); +%} + +// Div float double precision +instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (DivD src1 src2)); + + size(4); + format %{ "FDIVD $src1,$src2,$dst" %} + opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fdivd_opf); + ins_encode(form3_opf_rs1D_rs2D_rdD(src1, src2, dst)); + ins_pipe(fdivD_reg_reg); +%} + +// Absolute float double precision +instruct absD_reg(regD dst, regD src) %{ + match(Set dst (AbsD src)); + + format %{ "FABSd $src,$dst" %} + ins_encode(fabsd(dst, src)); + ins_pipe(faddD_reg); +%} + +// Absolute float single precision +instruct absF_reg(regF dst, regF src) %{ + match(Set dst (AbsF src)); + + format %{ "FABSs $src,$dst" %} + ins_encode(fabss(dst, src)); + ins_pipe(faddF_reg); +%} + +instruct negF_reg(regF dst, regF src) %{ + match(Set dst (NegF src)); + + size(4); + format %{ "FNEGs $src,$dst" %} + opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fnegs_opf); + ins_encode(form3_opf_rs2F_rdF(src, dst)); + ins_pipe(faddF_reg); +%} + +instruct negD_reg(regD dst, regD src) %{ + match(Set dst (NegD src)); + + format %{ "FNEGd $src,$dst" %} + ins_encode(fnegd(dst, src)); + ins_pipe(faddD_reg); +%} + +// Sqrt float double precision +instruct sqrtF_reg_reg(regF dst, regF src) %{ + match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); + + size(4); + format %{ "FSQRTS $src,$dst" %} + ins_encode(fsqrts(dst, src)); + ins_pipe(fdivF_reg_reg); +%} + +// Sqrt float double precision +instruct sqrtD_reg_reg(regD dst, regD src) %{ + match(Set dst (SqrtD src)); + + size(4); + format %{ "FSQRTD $src,$dst" %} + ins_encode(fsqrtd(dst, src)); + ins_pipe(fdivD_reg_reg); +%} + +// Single/Double precision fused floating-point multiply-add (d = a * b + c). +instruct fmaF_regx4(regF dst, regF a, regF b, regF c) %{ + predicate(UseFMA); + match(Set dst (FmaF c (Binary a b))); + format %{ "fmadds $a,$b,$c,$dst\t# $dst = $a * $b + $c" %} + ins_encode(fmadds(dst, a, b, c)); + ins_pipe(fmaF_regx4); +%} + +instruct fmaD_regx4(regD dst, regD a, regD b, regD c) %{ + predicate(UseFMA); + match(Set dst (FmaD c (Binary a b))); + format %{ "fmaddd $a,$b,$c,$dst\t# $dst = $a * $b + $c" %} + ins_encode(fmaddd(dst, a, b, c)); + ins_pipe(fmaD_regx4); +%} + +// Additional patterns matching complement versions that we can map directly to +// variants of the fused multiply-add instructions. + +// Single/Double precision fused floating-point multiply-sub (d = a * b - c) +instruct fmsubF_regx4(regF dst, regF a, regF b, regF c) %{ + predicate(UseFMA); + match(Set dst (FmaF (NegF c) (Binary a b))); + format %{ "fmsubs $a,$b,$c,$dst\t# $dst = $a * $b - $c" %} + ins_encode(fmsubs(dst, a, b, c)); + ins_pipe(fmaF_regx4); +%} + +instruct fmsubD_regx4(regD dst, regD a, regD b, regD c) %{ + predicate(UseFMA); + match(Set dst (FmaD (NegD c) (Binary a b))); + format %{ "fmsubd $a,$b,$c,$dst\t# $dst = $a * $b - $c" %} + ins_encode(fmsubd(dst, a, b, c)); + ins_pipe(fmaD_regx4); +%} + +// Single/Double precision fused floating-point neg. multiply-add, +// d = -1 * a * b - c = -(a * b + c) +instruct fnmaddF_regx4(regF dst, regF a, regF b, regF c) %{ + predicate(UseFMA); + match(Set dst (FmaF (NegF c) (Binary (NegF a) b))); + match(Set dst (FmaF (NegF c) (Binary a (NegF b)))); + format %{ "fnmadds $a,$b,$c,$dst\t# $dst = -($a * $b + $c)" %} + ins_encode(fnmadds(dst, a, b, c)); + ins_pipe(fmaF_regx4); +%} + +instruct fnmaddD_regx4(regD dst, regD a, regD b, regD c) %{ + predicate(UseFMA); + match(Set dst (FmaD (NegD c) (Binary (NegD a) b))); + match(Set dst (FmaD (NegD c) (Binary a (NegD b)))); + format %{ "fnmaddd $a,$b,$c,$dst\t# $dst = -($a * $b + $c)" %} + ins_encode(fnmaddd(dst, a, b, c)); + ins_pipe(fmaD_regx4); +%} + +// Single/Double precision fused floating-point neg. multiply-sub, +// d = -1 * a * b + c = -(a * b - c) +instruct fnmsubF_regx4(regF dst, regF a, regF b, regF c) %{ + predicate(UseFMA); + match(Set dst (FmaF c (Binary (NegF a) b))); + match(Set dst (FmaF c (Binary a (NegF b)))); + format %{ "fnmsubs $a,$b,$c,$dst\t# $dst = -($a * $b - $c)" %} + ins_encode(fnmsubs(dst, a, b, c)); + ins_pipe(fmaF_regx4); +%} + +instruct fnmsubD_regx4(regD dst, regD a, regD b, regD c) %{ + predicate(UseFMA); + match(Set dst (FmaD c (Binary (NegD a) b))); + match(Set dst (FmaD c (Binary a (NegD b)))); + format %{ "fnmsubd $a,$b,$c,$dst\t# $dst = -($a * $b - $c)" %} + ins_encode(fnmsubd(dst, a, b, c)); + ins_pipe(fmaD_regx4); +%} + +//----------Logical Instructions----------------------------------------------- +// And Instructions +// Register And +instruct andI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (AndI src1 src2)); + + size(4); + format %{ "AND $src1,$src2,$dst" %} + opcode(Assembler::and_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_reg); +%} + +// Immediate And +instruct andI_reg_imm13(iRegI dst, iRegI src1, immI13 src2) %{ + match(Set dst (AndI src1 src2)); + + size(4); + format %{ "AND $src1,$src2,$dst" %} + opcode(Assembler::and_op3, Assembler::arith_op); + ins_encode( form3_rs1_simm13_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_imm); +%} + +// Register And Long +instruct andL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ + match(Set dst (AndL src1 src2)); + + ins_cost(DEFAULT_COST); + size(4); + format %{ "AND $src1,$src2,$dst\t! long" %} + opcode(Assembler::and_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_reg); +%} + +instruct andL_reg_imm13(iRegL dst, iRegL src1, immL13 con) %{ + match(Set dst (AndL src1 con)); + + ins_cost(DEFAULT_COST); + size(4); + format %{ "AND $src1,$con,$dst\t! long" %} + opcode(Assembler::and_op3, Assembler::arith_op); + ins_encode( form3_rs1_simm13_rd( src1, con, dst ) ); + ins_pipe(ialu_reg_imm); +%} + +// Or Instructions +// Register Or +instruct orI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (OrI src1 src2)); + + size(4); + format %{ "OR $src1,$src2,$dst" %} + opcode(Assembler::or_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_reg); +%} + +// Immediate Or +instruct orI_reg_imm13(iRegI dst, iRegI src1, immI13 src2) %{ + match(Set dst (OrI src1 src2)); + + size(4); + format %{ "OR $src1,$src2,$dst" %} + opcode(Assembler::or_op3, Assembler::arith_op); + ins_encode( form3_rs1_simm13_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_imm); +%} + +// Register Or Long +instruct orL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ + match(Set dst (OrL src1 src2)); + + ins_cost(DEFAULT_COST); + size(4); + format %{ "OR $src1,$src2,$dst\t! long" %} + opcode(Assembler::or_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_reg); +%} + +instruct orL_reg_imm13(iRegL dst, iRegL src1, immL13 con) %{ + match(Set dst (OrL src1 con)); + ins_cost(DEFAULT_COST*2); + + ins_cost(DEFAULT_COST); + size(4); + format %{ "OR $src1,$con,$dst\t! long" %} + opcode(Assembler::or_op3, Assembler::arith_op); + ins_encode( form3_rs1_simm13_rd( src1, con, dst ) ); + ins_pipe(ialu_reg_imm); +%} + +instruct orL_reg_castP2X(iRegL dst, iRegL src1, sp_ptr_RegP src2) %{ + match(Set dst (OrL src1 (CastP2X src2))); + + ins_cost(DEFAULT_COST); + size(4); + format %{ "OR $src1,$src2,$dst\t! long" %} + opcode(Assembler::or_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_reg); +%} + +// Xor Instructions +// Register Xor +instruct xorI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (XorI src1 src2)); + + size(4); + format %{ "XOR $src1,$src2,$dst" %} + opcode(Assembler::xor_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_reg); +%} + +// Immediate Xor +instruct xorI_reg_imm13(iRegI dst, iRegI src1, immI13 src2) %{ + match(Set dst (XorI src1 src2)); + + size(4); + format %{ "XOR $src1,$src2,$dst" %} + opcode(Assembler::xor_op3, Assembler::arith_op); + ins_encode( form3_rs1_simm13_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_imm); +%} + +// Register Xor Long +instruct xorL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ + match(Set dst (XorL src1 src2)); + + ins_cost(DEFAULT_COST); + size(4); + format %{ "XOR $src1,$src2,$dst\t! long" %} + opcode(Assembler::xor_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) ); + ins_pipe(ialu_reg_reg); +%} + +instruct xorL_reg_imm13(iRegL dst, iRegL src1, immL13 con) %{ + match(Set dst (XorL src1 con)); + + ins_cost(DEFAULT_COST); + size(4); + format %{ "XOR $src1,$con,$dst\t! long" %} + opcode(Assembler::xor_op3, Assembler::arith_op); + ins_encode( form3_rs1_simm13_rd( src1, con, dst ) ); + ins_pipe(ialu_reg_imm); +%} + +//----------Convert to Boolean------------------------------------------------- +// Nice hack for 32-bit tests but doesn't work for +// 64-bit pointers. +instruct convI2B( iRegI dst, iRegI src, flagsReg ccr ) %{ + match(Set dst (Conv2B src)); + effect( KILL ccr ); + ins_cost(DEFAULT_COST*2); + format %{ "CMP R_G0,$src\n\t" + "ADDX R_G0,0,$dst" %} + ins_encode( enc_to_bool( src, dst ) ); + ins_pipe(ialu_reg_ialu); +%} + +instruct convP2B( iRegI dst, iRegP src ) %{ + match(Set dst (Conv2B src)); + ins_cost(DEFAULT_COST*2); + format %{ "MOV $src,$dst\n\t" + "MOVRNZ $src,1,$dst" %} + ins_encode( form3_g0_rs2_rd_move( src, dst ), enc_convP2B( dst, src ) ); + ins_pipe(ialu_clr_and_mover); +%} + +instruct cmpLTMask0( iRegI dst, iRegI src, immI0 zero, flagsReg ccr ) %{ + match(Set dst (CmpLTMask src zero)); + effect(KILL ccr); + size(4); + format %{ "SRA $src,#31,$dst\t# cmpLTMask0" %} + ins_encode %{ + __ sra($src$$Register, 31, $dst$$Register); + %} + ins_pipe(ialu_reg_imm); +%} + +instruct cmpLTMask_reg_reg( iRegI dst, iRegI p, iRegI q, flagsReg ccr ) %{ + match(Set dst (CmpLTMask p q)); + effect( KILL ccr ); + ins_cost(DEFAULT_COST*4); + format %{ "CMP $p,$q\n\t" + "MOV #0,$dst\n\t" + "BLT,a .+8\n\t" + "MOV #-1,$dst" %} + ins_encode( enc_ltmask(p,q,dst) ); + ins_pipe(ialu_reg_reg_ialu); +%} + +instruct cadd_cmpLTMask( iRegI p, iRegI q, iRegI y, iRegI tmp, flagsReg ccr ) %{ + match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); + effect(KILL ccr, TEMP tmp); + ins_cost(DEFAULT_COST*3); + + format %{ "SUBcc $p,$q,$p\t! p' = p-q\n\t" + "ADD $p,$y,$tmp\t! g3=p-q+y\n\t" + "MOVlt $tmp,$p\t! p' < 0 ? p'+y : p'" %} + ins_encode(enc_cadd_cmpLTMask(p, q, y, tmp)); + ins_pipe(cadd_cmpltmask); +%} + +instruct and_cmpLTMask(iRegI p, iRegI q, iRegI y, flagsReg ccr) %{ + match(Set p (AndI (CmpLTMask p q) y)); + effect(KILL ccr); + ins_cost(DEFAULT_COST*3); + + format %{ "CMP $p,$q\n\t" + "MOV $y,$p\n\t" + "MOVge G0,$p" %} + ins_encode %{ + __ cmp($p$$Register, $q$$Register); + __ mov($y$$Register, $p$$Register); + __ movcc(Assembler::greaterEqual, false, Assembler::icc, G0, $p$$Register); + %} + ins_pipe(ialu_reg_reg_ialu); +%} + +//----------------------------------------------------------------- +// Direct raw moves between float and general registers using VIS3. + +// ins_pipe(faddF_reg); +instruct MoveF2I_reg_reg(iRegI dst, regF src) %{ + predicate(UseVIS >= 3); + match(Set dst (MoveF2I src)); + + format %{ "MOVSTOUW $src,$dst\t! MoveF2I" %} + ins_encode %{ + __ movstouw($src$$FloatRegister, $dst$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct MoveI2F_reg_reg(regF dst, iRegI src) %{ + predicate(UseVIS >= 3); + match(Set dst (MoveI2F src)); + + format %{ "MOVWTOS $src,$dst\t! MoveI2F" %} + ins_encode %{ + __ movwtos($src$$Register, $dst$$FloatRegister); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct MoveD2L_reg_reg(iRegL dst, regD src) %{ + predicate(UseVIS >= 3); + match(Set dst (MoveD2L src)); + + format %{ "MOVDTOX $src,$dst\t! MoveD2L" %} + ins_encode %{ + __ movdtox(as_DoubleFloatRegister($src$$reg), $dst$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct MoveL2D_reg_reg(regD dst, iRegL src) %{ + predicate(UseVIS >= 3); + match(Set dst (MoveL2D src)); + + format %{ "MOVXTOD $src,$dst\t! MoveL2D" %} + ins_encode %{ + __ movxtod($src$$Register, as_DoubleFloatRegister($dst$$reg)); + %} + ins_pipe(ialu_reg_reg); +%} + + +// Raw moves between float and general registers using stack. + +instruct MoveF2I_stack_reg(iRegI dst, stackSlotF src) %{ + match(Set dst (MoveF2I src)); + effect(DEF dst, USE src); + ins_cost(MEMORY_REF_COST); + + format %{ "LDUW $src,$dst\t! MoveF2I" %} + opcode(Assembler::lduw_op3); + ins_encode(simple_form3_mem_reg( src, dst ) ); + ins_pipe(iload_mem); +%} + +instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{ + match(Set dst (MoveI2F src)); + effect(DEF dst, USE src); + ins_cost(MEMORY_REF_COST); + + format %{ "LDF $src,$dst\t! MoveI2F" %} + opcode(Assembler::ldf_op3); + ins_encode(simple_form3_mem_reg(src, dst)); + ins_pipe(floadF_stk); +%} + +instruct MoveD2L_stack_reg(iRegL dst, stackSlotD src) %{ + match(Set dst (MoveD2L src)); + effect(DEF dst, USE src); + ins_cost(MEMORY_REF_COST); + + format %{ "LDX $src,$dst\t! MoveD2L" %} + opcode(Assembler::ldx_op3); + ins_encode(simple_form3_mem_reg( src, dst ) ); + ins_pipe(iload_mem); +%} + +instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{ + match(Set dst (MoveL2D src)); + effect(DEF dst, USE src); + ins_cost(MEMORY_REF_COST); + + format %{ "LDDF $src,$dst\t! MoveL2D" %} + opcode(Assembler::lddf_op3); + ins_encode(simple_form3_mem_reg(src, dst)); + ins_pipe(floadD_stk); +%} + +instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{ + match(Set dst (MoveF2I src)); + effect(DEF dst, USE src); + ins_cost(MEMORY_REF_COST); + + format %{ "STF $src,$dst\t! MoveF2I" %} + opcode(Assembler::stf_op3); + ins_encode(simple_form3_mem_reg(dst, src)); + ins_pipe(fstoreF_stk_reg); +%} + +instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{ + match(Set dst (MoveI2F src)); + effect(DEF dst, USE src); + ins_cost(MEMORY_REF_COST); + + format %{ "STW $src,$dst\t! MoveI2F" %} + opcode(Assembler::stw_op3); + ins_encode(simple_form3_mem_reg( dst, src ) ); + ins_pipe(istore_mem_reg); +%} + +instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{ + match(Set dst (MoveD2L src)); + effect(DEF dst, USE src); + ins_cost(MEMORY_REF_COST); + + format %{ "STDF $src,$dst\t! MoveD2L" %} + opcode(Assembler::stdf_op3); + ins_encode(simple_form3_mem_reg(dst, src)); + ins_pipe(fstoreD_stk_reg); +%} + +instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{ + match(Set dst (MoveL2D src)); + effect(DEF dst, USE src); + ins_cost(MEMORY_REF_COST); + + format %{ "STX $src,$dst\t! MoveL2D" %} + opcode(Assembler::stx_op3); + ins_encode(simple_form3_mem_reg( dst, src ) ); + ins_pipe(istore_mem_reg); +%} + + +//----------Arithmetic Conversion Instructions--------------------------------- +// The conversions operations are all Alpha sorted. Please keep it that way! + +instruct convD2F_reg(regF dst, regD src) %{ + match(Set dst (ConvD2F src)); + size(4); + format %{ "FDTOS $src,$dst" %} + opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fdtos_opf); + ins_encode(form3_opf_rs2D_rdF(src, dst)); + ins_pipe(fcvtD2F); +%} + + +// Convert a double to an int in a float register. +// If the double is a NAN, stuff a zero in instead. +instruct convD2I_helper(regF dst, regD src, flagsRegF0 fcc0) %{ + effect(DEF dst, USE src, KILL fcc0); + format %{ "FCMPd fcc0,$src,$src\t! check for NAN\n\t" + "FBO,pt fcc0,skip\t! branch on ordered, predict taken\n\t" + "FDTOI $src,$dst\t! convert in delay slot\n\t" + "FITOS $dst,$dst\t! change NaN/max-int to valid float\n\t" + "FSUBs $dst,$dst,$dst\t! cleared only if nan\n" + "skip:" %} + ins_encode(form_d2i_helper(src,dst)); + ins_pipe(fcvtD2I); +%} + +instruct convD2I_stk(stackSlotI dst, regD src) %{ + match(Set dst (ConvD2I src)); + ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); + expand %{ + regF tmp; + convD2I_helper(tmp, src); + regF_to_stkI(dst, tmp); + %} +%} + +instruct convD2I_reg(iRegI dst, regD src) %{ + predicate(UseVIS >= 3); + match(Set dst (ConvD2I src)); + ins_cost(DEFAULT_COST*2 + BRANCH_COST); + expand %{ + regF tmp; + convD2I_helper(tmp, src); + MoveF2I_reg_reg(dst, tmp); + %} +%} + + +// Convert a double to a long in a double register. +// If the double is a NAN, stuff a zero in instead. +instruct convD2L_helper(regD dst, regD src, flagsRegF0 fcc0) %{ + effect(DEF dst, USE src, KILL fcc0); + format %{ "FCMPd fcc0,$src,$src\t! check for NAN\n\t" + "FBO,pt fcc0,skip\t! branch on ordered, predict taken\n\t" + "FDTOX $src,$dst\t! convert in delay slot\n\t" + "FXTOD $dst,$dst\t! change NaN/max-long to valid double\n\t" + "FSUBd $dst,$dst,$dst\t! cleared only if nan\n" + "skip:" %} + ins_encode(form_d2l_helper(src,dst)); + ins_pipe(fcvtD2L); +%} + +instruct convD2L_stk(stackSlotL dst, regD src) %{ + match(Set dst (ConvD2L src)); + ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); + expand %{ + regD tmp; + convD2L_helper(tmp, src); + regD_to_stkL(dst, tmp); + %} +%} + +instruct convD2L_reg(iRegL dst, regD src) %{ + predicate(UseVIS >= 3); + match(Set dst (ConvD2L src)); + ins_cost(DEFAULT_COST*2 + BRANCH_COST); + expand %{ + regD tmp; + convD2L_helper(tmp, src); + MoveD2L_reg_reg(dst, tmp); + %} +%} + + +instruct convF2D_reg(regD dst, regF src) %{ + match(Set dst (ConvF2D src)); + format %{ "FSTOD $src,$dst" %} + opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fstod_opf); + ins_encode(form3_opf_rs2F_rdD(src, dst)); + ins_pipe(fcvtF2D); +%} + + +// Convert a float to an int in a float register. +// If the float is a NAN, stuff a zero in instead. +instruct convF2I_helper(regF dst, regF src, flagsRegF0 fcc0) %{ + effect(DEF dst, USE src, KILL fcc0); + format %{ "FCMPs fcc0,$src,$src\t! check for NAN\n\t" + "FBO,pt fcc0,skip\t! branch on ordered, predict taken\n\t" + "FSTOI $src,$dst\t! convert in delay slot\n\t" + "FITOS $dst,$dst\t! change NaN/max-int to valid float\n\t" + "FSUBs $dst,$dst,$dst\t! cleared only if nan\n" + "skip:" %} + ins_encode(form_f2i_helper(src,dst)); + ins_pipe(fcvtF2I); +%} + +instruct convF2I_stk(stackSlotI dst, regF src) %{ + match(Set dst (ConvF2I src)); + ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); + expand %{ + regF tmp; + convF2I_helper(tmp, src); + regF_to_stkI(dst, tmp); + %} +%} + +instruct convF2I_reg(iRegI dst, regF src) %{ + predicate(UseVIS >= 3); + match(Set dst (ConvF2I src)); + ins_cost(DEFAULT_COST*2 + BRANCH_COST); + expand %{ + regF tmp; + convF2I_helper(tmp, src); + MoveF2I_reg_reg(dst, tmp); + %} +%} + + +// Convert a float to a long in a float register. +// If the float is a NAN, stuff a zero in instead. +instruct convF2L_helper(regD dst, regF src, flagsRegF0 fcc0) %{ + effect(DEF dst, USE src, KILL fcc0); + format %{ "FCMPs fcc0,$src,$src\t! check for NAN\n\t" + "FBO,pt fcc0,skip\t! branch on ordered, predict taken\n\t" + "FSTOX $src,$dst\t! convert in delay slot\n\t" + "FXTOD $dst,$dst\t! change NaN/max-long to valid double\n\t" + "FSUBd $dst,$dst,$dst\t! cleared only if nan\n" + "skip:" %} + ins_encode(form_f2l_helper(src,dst)); + ins_pipe(fcvtF2L); +%} + +instruct convF2L_stk(stackSlotL dst, regF src) %{ + match(Set dst (ConvF2L src)); + ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); + expand %{ + regD tmp; + convF2L_helper(tmp, src); + regD_to_stkL(dst, tmp); + %} +%} + +instruct convF2L_reg(iRegL dst, regF src) %{ + predicate(UseVIS >= 3); + match(Set dst (ConvF2L src)); + ins_cost(DEFAULT_COST*2 + BRANCH_COST); + expand %{ + regD tmp; + convF2L_helper(tmp, src); + MoveD2L_reg_reg(dst, tmp); + %} +%} + + +instruct convI2D_helper(regD dst, regF tmp) %{ + effect(USE tmp, DEF dst); + format %{ "FITOD $tmp,$dst" %} + opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fitod_opf); + ins_encode(form3_opf_rs2F_rdD(tmp, dst)); + ins_pipe(fcvtI2D); +%} + +instruct convI2D_stk(stackSlotI src, regD dst) %{ + match(Set dst (ConvI2D src)); + ins_cost(DEFAULT_COST + MEMORY_REF_COST); + expand %{ + regF tmp; + stkI_to_regF(tmp, src); + convI2D_helper(dst, tmp); + %} +%} + +instruct convI2D_reg(regD_low dst, iRegI src) %{ + predicate(UseVIS >= 3); + match(Set dst (ConvI2D src)); + expand %{ + regF tmp; + MoveI2F_reg_reg(tmp, src); + convI2D_helper(dst, tmp); + %} +%} + +instruct convI2D_mem(regD_low dst, memory mem) %{ + match(Set dst (ConvI2D (LoadI mem))); + ins_cost(DEFAULT_COST + MEMORY_REF_COST); + format %{ "LDF $mem,$dst\n\t" + "FITOD $dst,$dst" %} + opcode(Assembler::ldf_op3, Assembler::fitod_opf); + ins_encode(simple_form3_mem_reg( mem, dst ), form3_convI2F(dst, dst)); + ins_pipe(floadF_mem); +%} + + +instruct convI2F_helper(regF dst, regF tmp) %{ + effect(DEF dst, USE tmp); + format %{ "FITOS $tmp,$dst" %} + opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fitos_opf); + ins_encode(form3_opf_rs2F_rdF(tmp, dst)); + ins_pipe(fcvtI2F); +%} + +instruct convI2F_stk(regF dst, stackSlotI src) %{ + match(Set dst (ConvI2F src)); + ins_cost(DEFAULT_COST + MEMORY_REF_COST); + expand %{ + regF tmp; + stkI_to_regF(tmp,src); + convI2F_helper(dst, tmp); + %} +%} + +instruct convI2F_reg(regF dst, iRegI src) %{ + predicate(UseVIS >= 3); + match(Set dst (ConvI2F src)); + ins_cost(DEFAULT_COST); + expand %{ + regF tmp; + MoveI2F_reg_reg(tmp, src); + convI2F_helper(dst, tmp); + %} +%} + +instruct convI2F_mem( regF dst, memory mem ) %{ + match(Set dst (ConvI2F (LoadI mem))); + ins_cost(DEFAULT_COST + MEMORY_REF_COST); + format %{ "LDF $mem,$dst\n\t" + "FITOS $dst,$dst" %} + opcode(Assembler::ldf_op3, Assembler::fitos_opf); + ins_encode(simple_form3_mem_reg( mem, dst ), form3_convI2F(dst, dst)); + ins_pipe(floadF_mem); +%} + + +instruct convI2L_reg(iRegL dst, iRegI src) %{ + match(Set dst (ConvI2L src)); + size(4); + format %{ "SRA $src,0,$dst\t! int->long" %} + opcode(Assembler::sra_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( src, R_G0, dst ) ); + ins_pipe(ialu_reg_reg); +%} + +// Zero-extend convert int to long +instruct convI2L_reg_zex(iRegL dst, iRegI src, immL_32bits mask ) %{ + match(Set dst (AndL (ConvI2L src) mask) ); + size(4); + format %{ "SRL $src,0,$dst\t! zero-extend int to long" %} + opcode(Assembler::srl_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( src, R_G0, dst ) ); + ins_pipe(ialu_reg_reg); +%} + +// Zero-extend long +instruct zerox_long(iRegL dst, iRegL src, immL_32bits mask ) %{ + match(Set dst (AndL src mask) ); + size(4); + format %{ "SRL $src,0,$dst\t! zero-extend long" %} + opcode(Assembler::srl_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( src, R_G0, dst ) ); + ins_pipe(ialu_reg_reg); +%} + + +//----------- +// Long to Double conversion using V8 opcodes. +// Still useful because cheetah traps and becomes +// amazingly slow for some common numbers. + +// Magic constant, 0x43300000 +instruct loadConI_x43300000(iRegI dst) %{ + effect(DEF dst); + size(4); + format %{ "SETHI HI(0x43300000),$dst\t! 2^52" %} + ins_encode(SetHi22(0x43300000, dst)); + ins_pipe(ialu_none); +%} + +// Magic constant, 0x41f00000 +instruct loadConI_x41f00000(iRegI dst) %{ + effect(DEF dst); + size(4); + format %{ "SETHI HI(0x41f00000),$dst\t! 2^32" %} + ins_encode(SetHi22(0x41f00000, dst)); + ins_pipe(ialu_none); +%} + +// Construct a double from two float halves +instruct regDHi_regDLo_to_regD(regD_low dst, regD_low src1, regD_low src2) %{ + effect(DEF dst, USE src1, USE src2); + size(8); + format %{ "FMOVS $src1.hi,$dst.hi\n\t" + "FMOVS $src2.lo,$dst.lo" %} + opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fmovs_opf); + ins_encode(form3_opf_rs2D_hi_rdD_hi(src1, dst), form3_opf_rs2D_lo_rdD_lo(src2, dst)); + ins_pipe(faddD_reg_reg); +%} + +// Convert integer in high half of a double register (in the lower half of +// the double register file) to double +instruct convI2D_regDHi_regD(regD dst, regD_low src) %{ + effect(DEF dst, USE src); + size(4); + format %{ "FITOD $src,$dst" %} + opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fitod_opf); + ins_encode(form3_opf_rs2D_rdD(src, dst)); + ins_pipe(fcvtLHi2D); +%} + +// Add float double precision +instruct addD_regD_regD(regD dst, regD src1, regD src2) %{ + effect(DEF dst, USE src1, USE src2); + size(4); + format %{ "FADDD $src1,$src2,$dst" %} + opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::faddd_opf); + ins_encode(form3_opf_rs1D_rs2D_rdD(src1, src2, dst)); + ins_pipe(faddD_reg_reg); +%} + +// Sub float double precision +instruct subD_regD_regD(regD dst, regD src1, regD src2) %{ + effect(DEF dst, USE src1, USE src2); + size(4); + format %{ "FSUBD $src1,$src2,$dst" %} + opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fsubd_opf); + ins_encode(form3_opf_rs1D_rs2D_rdD(src1, src2, dst)); + ins_pipe(faddD_reg_reg); +%} + +// Mul float double precision +instruct mulD_regD_regD(regD dst, regD src1, regD src2) %{ + effect(DEF dst, USE src1, USE src2); + size(4); + format %{ "FMULD $src1,$src2,$dst" %} + opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fmuld_opf); + ins_encode(form3_opf_rs1D_rs2D_rdD(src1, src2, dst)); + ins_pipe(fmulD_reg_reg); +%} + +// Long to Double conversion using fast fxtof +instruct convL2D_helper(regD dst, regD tmp) %{ + effect(DEF dst, USE tmp); + size(4); + format %{ "FXTOD $tmp,$dst" %} + opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fxtod_opf); + ins_encode(form3_opf_rs2D_rdD(tmp, dst)); + ins_pipe(fcvtL2D); +%} + +instruct convL2D_stk_fast_fxtof(regD dst, stackSlotL src) %{ + match(Set dst (ConvL2D src)); + ins_cost(DEFAULT_COST + 3 * MEMORY_REF_COST); + expand %{ + regD tmp; + stkL_to_regD(tmp, src); + convL2D_helper(dst, tmp); + %} +%} + +instruct convL2D_reg(regD dst, iRegL src) %{ + predicate(UseVIS >= 3); + match(Set dst (ConvL2D src)); + expand %{ + regD tmp; + MoveL2D_reg_reg(tmp, src); + convL2D_helper(dst, tmp); + %} +%} + +// Long to Float conversion using fast fxtof +instruct convL2F_helper(regF dst, regD tmp) %{ + effect(DEF dst, USE tmp); + size(4); + format %{ "FXTOS $tmp,$dst" %} + opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fxtos_opf); + ins_encode(form3_opf_rs2D_rdF(tmp, dst)); + ins_pipe(fcvtL2F); +%} + +instruct convL2F_stk_fast_fxtof(regF dst, stackSlotL src) %{ + match(Set dst (ConvL2F src)); + ins_cost(DEFAULT_COST + MEMORY_REF_COST); + expand %{ + regD tmp; + stkL_to_regD(tmp, src); + convL2F_helper(dst, tmp); + %} +%} + +instruct convL2F_reg(regF dst, iRegL src) %{ + predicate(UseVIS >= 3); + match(Set dst (ConvL2F src)); + ins_cost(DEFAULT_COST); + expand %{ + regD tmp; + MoveL2D_reg_reg(tmp, src); + convL2F_helper(dst, tmp); + %} +%} + +//----------- + +instruct convL2I_reg(iRegI dst, iRegL src) %{ + match(Set dst (ConvL2I src)); + size(4); + format %{ "SRA $src,R_G0,$dst\t! long->int" %} + ins_encode( form3_rs1_rd_signextend_lo1( src, dst ) ); + ins_pipe(ialu_reg); +%} + +// Register Shift Right Immediate +instruct shrL_reg_imm6_L2I(iRegI dst, iRegL src, immI_32_63 cnt) %{ + match(Set dst (ConvL2I (RShiftL src cnt))); + + size(4); + format %{ "SRAX $src,$cnt,$dst" %} + opcode(Assembler::srax_op3, Assembler::arith_op); + ins_encode( form3_sd_rs1_imm6_rd( src, cnt, dst ) ); + ins_pipe(ialu_reg_imm); +%} + +//----------Control Flow Instructions------------------------------------------ +// Compare Instructions +// Compare Integers +instruct compI_iReg(flagsReg icc, iRegI op1, iRegI op2) %{ + match(Set icc (CmpI op1 op2)); + effect( DEF icc, USE op1, USE op2 ); + + size(4); + format %{ "CMP $op1,$op2" %} + opcode(Assembler::subcc_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( op1, op2, R_G0 ) ); + ins_pipe(ialu_cconly_reg_reg); +%} + +instruct compU_iReg(flagsRegU icc, iRegI op1, iRegI op2) %{ + match(Set icc (CmpU op1 op2)); + + size(4); + format %{ "CMP $op1,$op2\t! unsigned" %} + opcode(Assembler::subcc_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( op1, op2, R_G0 ) ); + ins_pipe(ialu_cconly_reg_reg); +%} + +instruct compUL_iReg(flagsRegUL xcc, iRegL op1, iRegL op2) %{ + match(Set xcc (CmpUL op1 op2)); + effect(DEF xcc, USE op1, USE op2); + + size(4); + format %{ "CMP $op1,$op2\t! unsigned long" %} + opcode(Assembler::subcc_op3, Assembler::arith_op); + ins_encode(form3_rs1_rs2_rd(op1, op2, R_G0)); + ins_pipe(ialu_cconly_reg_reg); +%} + +instruct compI_iReg_imm13(flagsReg icc, iRegI op1, immI13 op2) %{ + match(Set icc (CmpI op1 op2)); + effect( DEF icc, USE op1 ); + + size(4); + format %{ "CMP $op1,$op2" %} + opcode(Assembler::subcc_op3, Assembler::arith_op); + ins_encode( form3_rs1_simm13_rd( op1, op2, R_G0 ) ); + ins_pipe(ialu_cconly_reg_imm); +%} + +instruct testI_reg_reg( flagsReg icc, iRegI op1, iRegI op2, immI0 zero ) %{ + match(Set icc (CmpI (AndI op1 op2) zero)); + + size(4); + format %{ "BTST $op2,$op1" %} + opcode(Assembler::andcc_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( op1, op2, R_G0 ) ); + ins_pipe(ialu_cconly_reg_reg_zero); +%} + +instruct testI_reg_imm( flagsReg icc, iRegI op1, immI13 op2, immI0 zero ) %{ + match(Set icc (CmpI (AndI op1 op2) zero)); + + size(4); + format %{ "BTST $op2,$op1" %} + opcode(Assembler::andcc_op3, Assembler::arith_op); + ins_encode( form3_rs1_simm13_rd( op1, op2, R_G0 ) ); + ins_pipe(ialu_cconly_reg_imm_zero); +%} + +instruct compL_reg_reg(flagsRegL xcc, iRegL op1, iRegL op2 ) %{ + match(Set xcc (CmpL op1 op2)); + effect( DEF xcc, USE op1, USE op2 ); + + size(4); + format %{ "CMP $op1,$op2\t\t! long" %} + opcode(Assembler::subcc_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( op1, op2, R_G0 ) ); + ins_pipe(ialu_cconly_reg_reg); +%} + +instruct compL_reg_con(flagsRegL xcc, iRegL op1, immL13 con) %{ + match(Set xcc (CmpL op1 con)); + effect( DEF xcc, USE op1, USE con ); + + size(4); + format %{ "CMP $op1,$con\t\t! long" %} + opcode(Assembler::subcc_op3, Assembler::arith_op); + ins_encode( form3_rs1_simm13_rd( op1, con, R_G0 ) ); + ins_pipe(ialu_cconly_reg_reg); +%} + +instruct testL_reg_reg(flagsRegL xcc, iRegL op1, iRegL op2, immL0 zero) %{ + match(Set xcc (CmpL (AndL op1 op2) zero)); + effect( DEF xcc, USE op1, USE op2 ); + + size(4); + format %{ "BTST $op1,$op2\t\t! long" %} + opcode(Assembler::andcc_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( op1, op2, R_G0 ) ); + ins_pipe(ialu_cconly_reg_reg); +%} + +// useful for checking the alignment of a pointer: +instruct testL_reg_con(flagsRegL xcc, iRegL op1, immL13 con, immL0 zero) %{ + match(Set xcc (CmpL (AndL op1 con) zero)); + effect( DEF xcc, USE op1, USE con ); + + size(4); + format %{ "BTST $op1,$con\t\t! long" %} + opcode(Assembler::andcc_op3, Assembler::arith_op); + ins_encode( form3_rs1_simm13_rd( op1, con, R_G0 ) ); + ins_pipe(ialu_cconly_reg_reg); +%} + +instruct compU_iReg_imm13(flagsRegU icc, iRegI op1, immU12 op2 ) %{ + match(Set icc (CmpU op1 op2)); + + size(4); + format %{ "CMP $op1,$op2\t! unsigned" %} + opcode(Assembler::subcc_op3, Assembler::arith_op); + ins_encode( form3_rs1_simm13_rd( op1, op2, R_G0 ) ); + ins_pipe(ialu_cconly_reg_imm); +%} + +instruct compUL_iReg_imm13(flagsRegUL xcc, iRegL op1, immUL12 op2) %{ + match(Set xcc (CmpUL op1 op2)); + effect(DEF xcc, USE op1, USE op2); + + size(4); + format %{ "CMP $op1,$op2\t! unsigned long" %} + opcode(Assembler::subcc_op3, Assembler::arith_op); + ins_encode(form3_rs1_simm13_rd(op1, op2, R_G0)); + ins_pipe(ialu_cconly_reg_imm); +%} + +// Compare Pointers +instruct compP_iRegP(flagsRegP pcc, iRegP op1, iRegP op2 ) %{ + match(Set pcc (CmpP op1 op2)); + + size(4); + format %{ "CMP $op1,$op2\t! ptr" %} + opcode(Assembler::subcc_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( op1, op2, R_G0 ) ); + ins_pipe(ialu_cconly_reg_reg); +%} + +instruct compP_iRegP_imm13(flagsRegP pcc, iRegP op1, immP13 op2 ) %{ + match(Set pcc (CmpP op1 op2)); + + size(4); + format %{ "CMP $op1,$op2\t! ptr" %} + opcode(Assembler::subcc_op3, Assembler::arith_op); + ins_encode( form3_rs1_simm13_rd( op1, op2, R_G0 ) ); + ins_pipe(ialu_cconly_reg_imm); +%} + +// Compare Narrow oops +instruct compN_iRegN(flagsReg icc, iRegN op1, iRegN op2 ) %{ + match(Set icc (CmpN op1 op2)); + + size(4); + format %{ "CMP $op1,$op2\t! compressed ptr" %} + opcode(Assembler::subcc_op3, Assembler::arith_op); + ins_encode( form3_rs1_rs2_rd( op1, op2, R_G0 ) ); + ins_pipe(ialu_cconly_reg_reg); +%} + +instruct compN_iRegN_immN0(flagsReg icc, iRegN op1, immN0 op2 ) %{ + match(Set icc (CmpN op1 op2)); + + size(4); + format %{ "CMP $op1,$op2\t! compressed ptr" %} + opcode(Assembler::subcc_op3, Assembler::arith_op); + ins_encode( form3_rs1_simm13_rd( op1, op2, R_G0 ) ); + ins_pipe(ialu_cconly_reg_imm); +%} + +//----------Max and Min-------------------------------------------------------- +// Min Instructions +// Conditional move for min +instruct cmovI_reg_lt( iRegI op2, iRegI op1, flagsReg icc ) %{ + effect( USE_DEF op2, USE op1, USE icc ); + + size(4); + format %{ "MOVlt icc,$op1,$op2\t! min" %} + opcode(Assembler::less); + ins_encode( enc_cmov_reg_minmax(op2,op1) ); + ins_pipe(ialu_reg_flags); +%} + +// Min Register with Register. +instruct minI_eReg(iRegI op1, iRegI op2) %{ + match(Set op2 (MinI op1 op2)); + ins_cost(DEFAULT_COST*2); + expand %{ + flagsReg icc; + compI_iReg(icc,op1,op2); + cmovI_reg_lt(op2,op1,icc); + %} +%} + +// Max Instructions +// Conditional move for max +instruct cmovI_reg_gt( iRegI op2, iRegI op1, flagsReg icc ) %{ + effect( USE_DEF op2, USE op1, USE icc ); + format %{ "MOVgt icc,$op1,$op2\t! max" %} + opcode(Assembler::greater); + ins_encode( enc_cmov_reg_minmax(op2,op1) ); + ins_pipe(ialu_reg_flags); +%} + +// Max Register with Register +instruct maxI_eReg(iRegI op1, iRegI op2) %{ + match(Set op2 (MaxI op1 op2)); + ins_cost(DEFAULT_COST*2); + expand %{ + flagsReg icc; + compI_iReg(icc,op1,op2); + cmovI_reg_gt(op2,op1,icc); + %} +%} + + +//----------Float Compares---------------------------------------------------- +// Compare floating, generate condition code +instruct cmpF_cc(flagsRegF fcc, regF src1, regF src2) %{ + match(Set fcc (CmpF src1 src2)); + + size(4); + format %{ "FCMPs $fcc,$src1,$src2" %} + opcode(Assembler::fpop2_op3, Assembler::arith_op, Assembler::fcmps_opf); + ins_encode( form3_opf_rs1F_rs2F_fcc( src1, src2, fcc ) ); + ins_pipe(faddF_fcc_reg_reg_zero); +%} + +instruct cmpD_cc(flagsRegF fcc, regD src1, regD src2) %{ + match(Set fcc (CmpD src1 src2)); + + size(4); + format %{ "FCMPd $fcc,$src1,$src2" %} + opcode(Assembler::fpop2_op3, Assembler::arith_op, Assembler::fcmpd_opf); + ins_encode( form3_opf_rs1D_rs2D_fcc( src1, src2, fcc ) ); + ins_pipe(faddD_fcc_reg_reg_zero); +%} + + +// Compare floating, generate -1,0,1 +instruct cmpF_reg(iRegI dst, regF src1, regF src2, flagsRegF0 fcc0) %{ + match(Set dst (CmpF3 src1 src2)); + effect(KILL fcc0); + ins_cost(DEFAULT_COST*3+BRANCH_COST*3); + format %{ "fcmpl $dst,$src1,$src2" %} + // Primary = float + opcode( true ); + ins_encode( floating_cmp( dst, src1, src2 ) ); + ins_pipe( floating_cmp ); +%} + +instruct cmpD_reg(iRegI dst, regD src1, regD src2, flagsRegF0 fcc0) %{ + match(Set dst (CmpD3 src1 src2)); + effect(KILL fcc0); + ins_cost(DEFAULT_COST*3+BRANCH_COST*3); + format %{ "dcmpl $dst,$src1,$src2" %} + // Primary = double (not float) + opcode( false ); + ins_encode( floating_cmp( dst, src1, src2 ) ); + ins_pipe( floating_cmp ); +%} + +//----------Branches--------------------------------------------------------- +// Jump +// (compare 'operand indIndex' and 'instruct addP_reg_reg' above) +instruct jumpXtnd(iRegX switch_val, o7RegI table) %{ + match(Jump switch_val); + effect(TEMP table); + + ins_cost(350); + + format %{ "ADD $constanttablebase, $constantoffset, O7\n\t" + "LD [O7 + $switch_val], O7\n\t" + "JUMP O7" %} + ins_encode %{ + // Calculate table address into a register. + Register table_reg; + Register label_reg = O7; + // If we are calculating the size of this instruction don't trust + // zero offsets because they might change when + // MachConstantBaseNode decides to optimize the constant table + // base. + if ((constant_offset() == 0) && !Compile::current()->output()->in_scratch_emit_size()) { + table_reg = $constanttablebase; + } else { + table_reg = O7; + RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset, O7); + __ add($constanttablebase, con_offset, table_reg); + } + + // Jump to base address + switch value + __ ld_ptr(table_reg, $switch_val$$Register, label_reg); + __ jmp(label_reg, G0); + __ delayed()->nop(); + %} + ins_pipe(ialu_reg_reg); +%} + +// Direct Branch. Use V8 version with longer range. +instruct branch(label labl) %{ + match(Goto); + effect(USE labl); + + size(8); + ins_cost(BRANCH_COST); + format %{ "BA $labl" %} + ins_encode %{ + Label* L = $labl$$label; + __ ba(*L); + __ delayed()->nop(); + %} + ins_avoid_back_to_back(AVOID_BEFORE); + ins_pipe(br); +%} + +// Direct Branch, short with no delay slot +instruct branch_short(label labl) %{ + match(Goto); + predicate(UseCBCond); + effect(USE labl); + + size(4); // Assuming no NOP inserted. + ins_cost(BRANCH_COST); + format %{ "BA $labl\t! short branch" %} + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ ba_short(*L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); + ins_pipe(cbcond_reg_imm); +%} + +// Conditional Direct Branch +instruct branchCon(cmpOp cmp, flagsReg icc, label labl) %{ + match(If cmp icc); + effect(USE labl); + + size(8); + ins_cost(BRANCH_COST); + format %{ "BP$cmp $icc,$labl" %} + // Prim = bits 24-22, Secnd = bits 31-30 + ins_encode( enc_bp( labl, cmp, icc ) ); + ins_avoid_back_to_back(AVOID_BEFORE); + ins_pipe(br_cc); +%} + +instruct branchConU(cmpOpU cmp, flagsRegU icc, label labl) %{ + match(If cmp icc); + effect(USE labl); + + ins_cost(BRANCH_COST); + format %{ "BP$cmp $icc,$labl" %} + // Prim = bits 24-22, Secnd = bits 31-30 + ins_encode( enc_bp( labl, cmp, icc ) ); + ins_avoid_back_to_back(AVOID_BEFORE); + ins_pipe(br_cc); +%} + +instruct branchConP(cmpOpP cmp, flagsRegP pcc, label labl) %{ + match(If cmp pcc); + effect(USE labl); + + size(8); + ins_cost(BRANCH_COST); + format %{ "BP$cmp $pcc,$labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + + __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::ptr_cc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_avoid_back_to_back(AVOID_BEFORE); + ins_pipe(br_cc); +%} + +instruct branchConF(cmpOpF cmp, flagsRegF fcc, label labl) %{ + match(If cmp fcc); + effect(USE labl); + + size(8); + ins_cost(BRANCH_COST); + format %{ "FBP$cmp $fcc,$labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + + __ fbp( (Assembler::Condition)($cmp$$cmpcode), false, (Assembler::CC)($fcc$$reg), predict_taken, *L); + __ delayed()->nop(); + %} + ins_avoid_back_to_back(AVOID_BEFORE); + ins_pipe(br_fcc); +%} + +instruct branchLoopEnd(cmpOp cmp, flagsReg icc, label labl) %{ + match(CountedLoopEnd cmp icc); + effect(USE labl); + + size(8); + ins_cost(BRANCH_COST); + format %{ "BP$cmp $icc,$labl\t! Loop end" %} + // Prim = bits 24-22, Secnd = bits 31-30 + ins_encode( enc_bp( labl, cmp, icc ) ); + ins_avoid_back_to_back(AVOID_BEFORE); + ins_pipe(br_cc); +%} + +instruct branchLoopEndU(cmpOpU cmp, flagsRegU icc, label labl) %{ + match(CountedLoopEnd cmp icc); + effect(USE labl); + + size(8); + ins_cost(BRANCH_COST); + format %{ "BP$cmp $icc,$labl\t! Loop end" %} + // Prim = bits 24-22, Secnd = bits 31-30 + ins_encode( enc_bp( labl, cmp, icc ) ); + ins_avoid_back_to_back(AVOID_BEFORE); + ins_pipe(br_cc); +%} + +// Compare and branch instructions +instruct cmpI_reg_branch(cmpOp cmp, iRegI op1, iRegI op2, label labl, flagsReg icc) %{ + match(If cmp (CmpI op1 op2)); + effect(USE labl, KILL icc); + + size(12); + ins_cost(BRANCH_COST); + format %{ "CMP $op1,$op2\t! int\n\t" + "BP$cmp $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + __ cmp($op1$$Register, $op2$$Register); + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_pipe(cmp_br_reg_reg); +%} + +instruct cmpI_imm_branch(cmpOp cmp, iRegI op1, immI5 op2, label labl, flagsReg icc) %{ + match(If cmp (CmpI op1 op2)); + effect(USE labl, KILL icc); + + size(12); + ins_cost(BRANCH_COST); + format %{ "CMP $op1,$op2\t! int\n\t" + "BP$cmp $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + __ cmp($op1$$Register, $op2$$constant); + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_pipe(cmp_br_reg_imm); +%} + +instruct cmpU_reg_branch(cmpOpU cmp, iRegI op1, iRegI op2, label labl, flagsRegU icc) %{ + match(If cmp (CmpU op1 op2)); + effect(USE labl, KILL icc); + + size(12); + ins_cost(BRANCH_COST); + format %{ "CMP $op1,$op2\t! unsigned\n\t" + "BP$cmp $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + __ cmp($op1$$Register, $op2$$Register); + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_pipe(cmp_br_reg_reg); +%} + +instruct cmpU_imm_branch(cmpOpU cmp, iRegI op1, immI5 op2, label labl, flagsRegU icc) %{ + match(If cmp (CmpU op1 op2)); + effect(USE labl, KILL icc); + + size(12); + ins_cost(BRANCH_COST); + format %{ "CMP $op1,$op2\t! unsigned\n\t" + "BP$cmp $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + __ cmp($op1$$Register, $op2$$constant); + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_pipe(cmp_br_reg_imm); +%} + +instruct cmpUL_reg_branch(cmpOpU cmp, iRegL op1, iRegL op2, label labl, flagsRegUL xcc) %{ + match(If cmp (CmpUL op1 op2)); + effect(USE labl, KILL xcc); + + size(12); + ins_cost(BRANCH_COST); + format %{ "CMP $op1,$op2\t! unsigned long\n\t" + "BP$cmp $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + __ cmp($op1$$Register, $op2$$Register); + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::xcc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_pipe(cmp_br_reg_reg); +%} + +instruct cmpUL_imm_branch(cmpOpU cmp, iRegL op1, immL5 op2, label labl, flagsRegUL xcc) %{ + match(If cmp (CmpUL op1 op2)); + effect(USE labl, KILL xcc); + + size(12); + ins_cost(BRANCH_COST); + format %{ "CMP $op1,$op2\t! unsigned long\n\t" + "BP$cmp $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + __ cmp($op1$$Register, $op2$$constant); + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::xcc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_pipe(cmp_br_reg_imm); +%} + +instruct cmpL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, flagsRegL xcc) %{ + match(If cmp (CmpL op1 op2)); + effect(USE labl, KILL xcc); + + size(12); + ins_cost(BRANCH_COST); + format %{ "CMP $op1,$op2\t! long\n\t" + "BP$cmp $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + __ cmp($op1$$Register, $op2$$Register); + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::xcc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_pipe(cmp_br_reg_reg); +%} + +instruct cmpL_imm_branch(cmpOp cmp, iRegL op1, immL5 op2, label labl, flagsRegL xcc) %{ + match(If cmp (CmpL op1 op2)); + effect(USE labl, KILL xcc); + + size(12); + ins_cost(BRANCH_COST); + format %{ "CMP $op1,$op2\t! long\n\t" + "BP$cmp $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + __ cmp($op1$$Register, $op2$$constant); + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::xcc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_pipe(cmp_br_reg_imm); +%} + +// Compare Pointers and branch +instruct cmpP_reg_branch(cmpOpP cmp, iRegP op1, iRegP op2, label labl, flagsRegP pcc) %{ + match(If cmp (CmpP op1 op2)); + effect(USE labl, KILL pcc); + + size(12); + ins_cost(BRANCH_COST); + format %{ "CMP $op1,$op2\t! ptr\n\t" + "B$cmp $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + __ cmp($op1$$Register, $op2$$Register); + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::ptr_cc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_pipe(cmp_br_reg_reg); +%} + +instruct cmpP_null_branch(cmpOpP cmp, iRegP op1, immP0 null, label labl, flagsRegP pcc) %{ + match(If cmp (CmpP op1 null)); + effect(USE labl, KILL pcc); + + size(12); + ins_cost(BRANCH_COST); + format %{ "CMP $op1,0\t! ptr\n\t" + "B$cmp $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + __ cmp($op1$$Register, G0); + // bpr() is not used here since it has shorter distance. + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::ptr_cc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_pipe(cmp_br_reg_reg); +%} + +instruct cmpN_reg_branch(cmpOp cmp, iRegN op1, iRegN op2, label labl, flagsReg icc) %{ + match(If cmp (CmpN op1 op2)); + effect(USE labl, KILL icc); + + size(12); + ins_cost(BRANCH_COST); + format %{ "CMP $op1,$op2\t! compressed ptr\n\t" + "BP$cmp $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + __ cmp($op1$$Register, $op2$$Register); + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_pipe(cmp_br_reg_reg); +%} + +instruct cmpN_null_branch(cmpOp cmp, iRegN op1, immN0 null, label labl, flagsReg icc) %{ + match(If cmp (CmpN op1 null)); + effect(USE labl, KILL icc); + + size(12); + ins_cost(BRANCH_COST); + format %{ "CMP $op1,0\t! compressed ptr\n\t" + "BP$cmp $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + __ cmp($op1$$Register, G0); + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_pipe(cmp_br_reg_reg); +%} + +// Loop back branch +instruct cmpI_reg_branchLoopEnd(cmpOp cmp, iRegI op1, iRegI op2, label labl, flagsReg icc) %{ + match(CountedLoopEnd cmp (CmpI op1 op2)); + effect(USE labl, KILL icc); + + size(12); + ins_cost(BRANCH_COST); + format %{ "CMP $op1,$op2\t! int\n\t" + "BP$cmp $labl\t! Loop end" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + __ cmp($op1$$Register, $op2$$Register); + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_pipe(cmp_br_reg_reg); +%} + +instruct cmpI_imm_branchLoopEnd(cmpOp cmp, iRegI op1, immI5 op2, label labl, flagsReg icc) %{ + match(CountedLoopEnd cmp (CmpI op1 op2)); + effect(USE labl, KILL icc); + + size(12); + ins_cost(BRANCH_COST); + format %{ "CMP $op1,$op2\t! int\n\t" + "BP$cmp $labl\t! Loop end" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + __ cmp($op1$$Register, $op2$$constant); + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_pipe(cmp_br_reg_imm); +%} + +// Short compare and branch instructions +instruct cmpI_reg_branch_short(cmpOp cmp, iRegI op1, iRegI op2, label labl, flagsReg icc) %{ + match(If cmp (CmpI op1 op2)); + predicate(UseCBCond); + effect(USE labl, KILL icc); + + size(4); // Assuming no NOP inserted. + ins_cost(BRANCH_COST); + format %{ "CWB$cmp $op1,$op2,$labl\t! int" %} + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); + ins_pipe(cbcond_reg_reg); +%} + +instruct cmpI_imm_branch_short(cmpOp cmp, iRegI op1, immI5 op2, label labl, flagsReg icc) %{ + match(If cmp (CmpI op1 op2)); + predicate(UseCBCond); + effect(USE labl, KILL icc); + + size(4); // Assuming no NOP inserted. + ins_cost(BRANCH_COST); + format %{ "CWB$cmp $op1,$op2,$labl\t! int" %} + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$constant, *L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); + ins_pipe(cbcond_reg_imm); +%} + +instruct cmpU_reg_branch_short(cmpOpU cmp, iRegI op1, iRegI op2, label labl, flagsRegU icc) %{ + match(If cmp (CmpU op1 op2)); + predicate(UseCBCond); + effect(USE labl, KILL icc); + + size(4); // Assuming no NOP inserted. + ins_cost(BRANCH_COST); + format %{ "CWB$cmp $op1,$op2,$labl\t! unsigned" %} + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); + ins_pipe(cbcond_reg_reg); +%} + +instruct cmpU_imm_branch_short(cmpOpU cmp, iRegI op1, immI5 op2, label labl, flagsRegU icc) %{ + match(If cmp (CmpU op1 op2)); + predicate(UseCBCond); + effect(USE labl, KILL icc); + + size(4); // Assuming no NOP inserted. + ins_cost(BRANCH_COST); + format %{ "CWB$cmp $op1,$op2,$labl\t! unsigned" %} + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$constant, *L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); + ins_pipe(cbcond_reg_imm); +%} + +instruct cmpUL_reg_branch_short(cmpOpU cmp, iRegL op1, iRegL op2, label labl, flagsRegUL xcc) %{ + match(If cmp (CmpUL op1 op2)); + predicate(UseCBCond); + effect(USE labl, KILL xcc); + + size(4); + ins_cost(BRANCH_COST); + format %{ "CXB$cmp $op1,$op2,$labl\t! unsigned long" %} + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::xcc, $op1$$Register, $op2$$Register, *L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); + ins_pipe(cbcond_reg_reg); +%} + +instruct cmpUL_imm_branch_short(cmpOpU cmp, iRegL op1, immL5 op2, label labl, flagsRegUL xcc) %{ + match(If cmp (CmpUL op1 op2)); + predicate(UseCBCond); + effect(USE labl, KILL xcc); + + size(4); + ins_cost(BRANCH_COST); + format %{ "CXB$cmp $op1,$op2,$labl\t! unsigned long" %} + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::xcc, $op1$$Register, $op2$$constant, *L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); + ins_pipe(cbcond_reg_imm); +%} + +instruct cmpL_reg_branch_short(cmpOp cmp, iRegL op1, iRegL op2, label labl, flagsRegL xcc) %{ + match(If cmp (CmpL op1 op2)); + predicate(UseCBCond); + effect(USE labl, KILL xcc); + + size(4); // Assuming no NOP inserted. + ins_cost(BRANCH_COST); + format %{ "CXB$cmp $op1,$op2,$labl\t! long" %} + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::xcc, $op1$$Register, $op2$$Register, *L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); + ins_pipe(cbcond_reg_reg); +%} + +instruct cmpL_imm_branch_short(cmpOp cmp, iRegL op1, immL5 op2, label labl, flagsRegL xcc) %{ + match(If cmp (CmpL op1 op2)); + predicate(UseCBCond); + effect(USE labl, KILL xcc); + + size(4); // Assuming no NOP inserted. + ins_cost(BRANCH_COST); + format %{ "CXB$cmp $op1,$op2,$labl\t! long" %} + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::xcc, $op1$$Register, $op2$$constant, *L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); + ins_pipe(cbcond_reg_imm); +%} + +// Compare Pointers and branch +instruct cmpP_reg_branch_short(cmpOpP cmp, iRegP op1, iRegP op2, label labl, flagsRegP pcc) %{ + match(If cmp (CmpP op1 op2)); + predicate(UseCBCond); + effect(USE labl, KILL pcc); + + size(4); // Assuming no NOP inserted. + ins_cost(BRANCH_COST); + format %{ "CXB$cmp $op1,$op2,$labl\t! ptr" %} + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::ptr_cc, $op1$$Register, $op2$$Register, *L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); + ins_pipe(cbcond_reg_reg); +%} + +instruct cmpP_null_branch_short(cmpOpP cmp, iRegP op1, immP0 null, label labl, flagsRegP pcc) %{ + match(If cmp (CmpP op1 null)); + predicate(UseCBCond); + effect(USE labl, KILL pcc); + + size(4); // Assuming no NOP inserted. + ins_cost(BRANCH_COST); + format %{ "CXB$cmp $op1,0,$labl\t! ptr" %} + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::ptr_cc, $op1$$Register, G0, *L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); + ins_pipe(cbcond_reg_reg); +%} + +instruct cmpN_reg_branch_short(cmpOp cmp, iRegN op1, iRegN op2, label labl, flagsReg icc) %{ + match(If cmp (CmpN op1 op2)); + predicate(UseCBCond); + effect(USE labl, KILL icc); + + size(4); // Assuming no NOP inserted. + ins_cost(BRANCH_COST); + format %{ "CWB$cmp $op1,$op2,$labl\t! compressed ptr" %} + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); + ins_pipe(cbcond_reg_reg); +%} + +instruct cmpN_null_branch_short(cmpOp cmp, iRegN op1, immN0 null, label labl, flagsReg icc) %{ + match(If cmp (CmpN op1 null)); + predicate(UseCBCond); + effect(USE labl, KILL icc); + + size(4); // Assuming no NOP inserted. + ins_cost(BRANCH_COST); + format %{ "CWB$cmp $op1,0,$labl\t! compressed ptr" %} + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, G0, *L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); + ins_pipe(cbcond_reg_reg); +%} + +// Loop back branch +instruct cmpI_reg_branchLoopEnd_short(cmpOp cmp, iRegI op1, iRegI op2, label labl, flagsReg icc) %{ + match(CountedLoopEnd cmp (CmpI op1 op2)); + predicate(UseCBCond); + effect(USE labl, KILL icc); + + size(4); // Assuming no NOP inserted. + ins_cost(BRANCH_COST); + format %{ "CWB$cmp $op1,$op2,$labl\t! Loop end" %} + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); + ins_pipe(cbcond_reg_reg); +%} + +instruct cmpI_imm_branchLoopEnd_short(cmpOp cmp, iRegI op1, immI5 op2, label labl, flagsReg icc) %{ + match(CountedLoopEnd cmp (CmpI op1 op2)); + predicate(UseCBCond); + effect(USE labl, KILL icc); + + size(4); // Assuming no NOP inserted. + ins_cost(BRANCH_COST); + format %{ "CWB$cmp $op1,$op2,$labl\t! Loop end" %} + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$constant, *L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); + ins_pipe(cbcond_reg_imm); +%} + +// Branch-on-register tests all 64 bits. We assume that values +// in 64-bit registers always remains zero or sign extended +// unless our code munges the high bits. Interrupts can chop +// the high order bits to zero or sign at any time. +instruct branchCon_regI(cmpOp_reg cmp, iRegI op1, immI0 zero, label labl) %{ + match(If cmp (CmpI op1 zero)); + predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); + effect(USE labl); + + size(8); + ins_cost(BRANCH_COST); + format %{ "BR$cmp $op1,$labl" %} + ins_encode( enc_bpr( labl, cmp, op1 ) ); + ins_avoid_back_to_back(AVOID_BEFORE); + ins_pipe(br_reg); +%} + +instruct branchCon_regP(cmpOp_reg cmp, iRegP op1, immP0 null, label labl) %{ + match(If cmp (CmpP op1 null)); + predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); + effect(USE labl); + + size(8); + ins_cost(BRANCH_COST); + format %{ "BR$cmp $op1,$labl" %} + ins_encode( enc_bpr( labl, cmp, op1 ) ); + ins_avoid_back_to_back(AVOID_BEFORE); + ins_pipe(br_reg); +%} + +instruct branchCon_regL(cmpOp_reg cmp, iRegL op1, immL0 zero, label labl) %{ + match(If cmp (CmpL op1 zero)); + predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); + effect(USE labl); + + size(8); + ins_cost(BRANCH_COST); + format %{ "BR$cmp $op1,$labl" %} + ins_encode( enc_bpr( labl, cmp, op1 ) ); + ins_avoid_back_to_back(AVOID_BEFORE); + ins_pipe(br_reg); +%} + + +// ============================================================================ +// Long Compare +// +// Currently we hold longs in 2 registers. Comparing such values efficiently +// is tricky. The flavor of compare used depends on whether we are testing +// for LT, LE, or EQ. For a simple LT test we can check just the sign bit. +// The GE test is the negated LT test. The LE test can be had by commuting +// the operands (yielding a GE test) and then negating; negate again for the +// GT test. The EQ test is done by ORcc'ing the high and low halves, and the +// NE test is negated from that. + +// Due to a shortcoming in the ADLC, it mixes up expressions like: +// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the +// difference between 'Y' and '0L'. The tree-matches for the CmpI sections +// are collapsed internally in the ADLC's dfa-gen code. The match for +// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the +// foo match ends up with the wrong leaf. One fix is to not match both +// reg-reg and reg-zero forms of long-compare. This is unfortunate because +// both forms beat the trinary form of long-compare and both are very useful +// on Intel which has so few registers. + +instruct branchCon_long(cmpOp cmp, flagsRegL xcc, label labl) %{ + match(If cmp xcc); + effect(USE labl); + + size(8); + ins_cost(BRANCH_COST); + format %{ "BP$cmp $xcc,$labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + + __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::xcc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_avoid_back_to_back(AVOID_BEFORE); + ins_pipe(br_cc); +%} + +instruct branchConU_long(cmpOpU cmp, flagsRegUL xcc, label labl) %{ + match(If cmp xcc); + effect(USE labl); + + size(8); + ins_cost(BRANCH_COST); + format %{ "BP$cmp $xcc,$labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::xcc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_avoid_back_to_back(AVOID_BEFORE); + ins_pipe(br_cc); +%} + +// Manifest a CmpL3 result in an integer register. Very painful. +// This is the test to avoid. +instruct cmpL3_reg_reg(iRegI dst, iRegL src1, iRegL src2, flagsReg ccr ) %{ + match(Set dst (CmpL3 src1 src2) ); + effect( KILL ccr ); + ins_cost(6*DEFAULT_COST); + size(24); + format %{ "CMP $src1,$src2\t\t! long\n" + "\tBLT,a,pn done\n" + "\tMOV -1,$dst\t! delay slot\n" + "\tBGT,a,pn done\n" + "\tMOV 1,$dst\t! delay slot\n" + "\tCLR $dst\n" + "done:" %} + ins_encode( cmpl_flag(src1,src2,dst) ); + ins_pipe(cmpL_reg); +%} + +// Conditional move +instruct cmovLL_reg(cmpOp cmp, flagsRegL xcc, iRegL dst, iRegL src) %{ + match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src))); + ins_cost(150); + format %{ "MOV$cmp $xcc,$src,$dst\t! long" %} + ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::xcc)) ); + ins_pipe(ialu_reg); +%} + +instruct cmovLL_imm(cmpOp cmp, flagsRegL xcc, iRegL dst, immL0 src) %{ + match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src))); + ins_cost(140); + format %{ "MOV$cmp $xcc,$src,$dst\t! long" %} + ins_encode( enc_cmov_imm(cmp,dst,src, (Assembler::xcc)) ); + ins_pipe(ialu_imm); +%} + +instruct cmovIL_reg(cmpOp cmp, flagsRegL xcc, iRegI dst, iRegI src) %{ + match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src))); + ins_cost(150); + format %{ "MOV$cmp $xcc,$src,$dst" %} + ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::xcc)) ); + ins_pipe(ialu_reg); +%} + +instruct cmovIL_imm(cmpOp cmp, flagsRegL xcc, iRegI dst, immI11 src) %{ + match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src))); + ins_cost(140); + format %{ "MOV$cmp $xcc,$src,$dst" %} + ins_encode( enc_cmov_imm(cmp,dst,src, (Assembler::xcc)) ); + ins_pipe(ialu_imm); +%} + +instruct cmovNL_reg(cmpOp cmp, flagsRegL xcc, iRegN dst, iRegN src) %{ + match(Set dst (CMoveN (Binary cmp xcc) (Binary dst src))); + ins_cost(150); + format %{ "MOV$cmp $xcc,$src,$dst" %} + ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::xcc)) ); + ins_pipe(ialu_reg); +%} + +instruct cmovPL_reg(cmpOp cmp, flagsRegL xcc, iRegP dst, iRegP src) %{ + match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src))); + ins_cost(150); + format %{ "MOV$cmp $xcc,$src,$dst" %} + ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::xcc)) ); + ins_pipe(ialu_reg); +%} + +instruct cmovPL_imm(cmpOp cmp, flagsRegL xcc, iRegP dst, immP0 src) %{ + match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src))); + ins_cost(140); + format %{ "MOV$cmp $xcc,$src,$dst" %} + ins_encode( enc_cmov_imm(cmp,dst,src, (Assembler::xcc)) ); + ins_pipe(ialu_imm); +%} + +instruct cmovFL_reg(cmpOp cmp, flagsRegL xcc, regF dst, regF src) %{ + match(Set dst (CMoveF (Binary cmp xcc) (Binary dst src))); + ins_cost(150); + opcode(0x101); + format %{ "FMOVS$cmp $xcc,$src,$dst" %} + ins_encode( enc_cmovf_reg(cmp,dst,src, (Assembler::xcc)) ); + ins_pipe(int_conditional_float_move); +%} + +instruct cmovDL_reg(cmpOp cmp, flagsRegL xcc, regD dst, regD src) %{ + match(Set dst (CMoveD (Binary cmp xcc) (Binary dst src))); + ins_cost(150); + opcode(0x102); + format %{ "FMOVD$cmp $xcc,$src,$dst" %} + ins_encode( enc_cmovf_reg(cmp,dst,src, (Assembler::xcc)) ); + ins_pipe(int_conditional_float_move); +%} + +// ============================================================================ +// Safepoint Instruction +instruct safePoint_poll(iRegP poll) %{ + match(SafePoint poll); + effect(USE poll); + + size(4); + format %{ "LDX [$poll],R_G0\t! Safepoint: poll for GC" %} + ins_encode %{ + __ relocate(relocInfo::poll_type); + __ ld_ptr($poll$$Register, 0, G0); + %} + ins_pipe(loadPollP); +%} + +// ============================================================================ +// Call Instructions +// Call Java Static Instruction +instruct CallStaticJavaDirect( method meth ) %{ + match(CallStaticJava); + predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke()); + effect(USE meth); + + size(8); + ins_cost(CALL_COST); + format %{ "CALL,static ; NOP ==> " %} + ins_encode( Java_Static_Call( meth ), call_epilog ); + ins_avoid_back_to_back(AVOID_BEFORE); + ins_pipe(simple_call); +%} + +// Call Java Static Instruction (method handle version) +instruct CallStaticJavaHandle(method meth, l7RegP l7_mh_SP_save) %{ + match(CallStaticJava); + predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke()); + effect(USE meth, KILL l7_mh_SP_save); + + size(16); + ins_cost(CALL_COST); + format %{ "CALL,static/MethodHandle" %} + ins_encode(preserve_SP, Java_Static_Call(meth), restore_SP, call_epilog); + ins_pipe(simple_call); +%} + +// Call Java Dynamic Instruction +instruct CallDynamicJavaDirect( method meth ) %{ + match(CallDynamicJava); + effect(USE meth); + + ins_cost(CALL_COST); + format %{ "SET (empty),R_G5\n\t" + "CALL,dynamic ; NOP ==> " %} + ins_encode( Java_Dynamic_Call( meth ), call_epilog ); + ins_pipe(call); +%} + +// Call Runtime Instruction +instruct CallRuntimeDirect(method meth, l7RegP l7) %{ + match(CallRuntime); + effect(USE meth, KILL l7); + ins_cost(CALL_COST); + format %{ "CALL,runtime" %} + ins_encode( Java_To_Runtime( meth ), + call_epilog, adjust_long_from_native_call ); + ins_avoid_back_to_back(AVOID_BEFORE); + ins_pipe(simple_call); +%} + +// Call runtime without safepoint - same as CallRuntime +instruct CallLeafDirect(method meth, l7RegP l7) %{ + match(CallLeaf); + effect(USE meth, KILL l7); + ins_cost(CALL_COST); + format %{ "CALL,runtime leaf" %} + ins_encode( Java_To_Runtime( meth ), + call_epilog, + adjust_long_from_native_call ); + ins_avoid_back_to_back(AVOID_BEFORE); + ins_pipe(simple_call); +%} + +// Call runtime without safepoint - same as CallLeaf +instruct CallLeafNoFPDirect(method meth, l7RegP l7) %{ + match(CallLeafNoFP); + effect(USE meth, KILL l7); + ins_cost(CALL_COST); + format %{ "CALL,runtime leaf nofp" %} + ins_encode( Java_To_Runtime( meth ), + call_epilog, + adjust_long_from_native_call ); + ins_avoid_back_to_back(AVOID_BEFORE); + ins_pipe(simple_call); +%} + +// Tail Call; Jump from runtime stub to Java code. +// Also known as an 'interprocedural jump'. +// Target of jump will eventually return to caller. +// TailJump below removes the return address. +instruct TailCalljmpInd(g3RegP jump_target, inline_cache_regP method_oop) %{ + match(TailCall jump_target method_oop ); + + ins_cost(CALL_COST); + format %{ "Jmp $jump_target ; NOP \t! $method_oop holds method oop" %} + ins_encode(form_jmpl(jump_target)); + ins_avoid_back_to_back(AVOID_BEFORE); + ins_pipe(tail_call); +%} + + +// Return Instruction +instruct Ret() %{ + match(Return); + + // The epilogue node did the ret already. + size(0); + format %{ "! return" %} + ins_encode(); + ins_pipe(empty); +%} + + +// Tail Jump; remove the return address; jump to target. +// TailCall above leaves the return address around. +// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2). +// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a +// "restore" before this instruction (in Epilogue), we need to materialize it +// in %i0. +instruct tailjmpInd(g1RegP jump_target, i0RegP ex_oop) %{ + match( TailJump jump_target ex_oop ); + ins_cost(CALL_COST); + format %{ "! discard R_O7\n\t" + "Jmp $jump_target ; ADD O7,8,O1 \t! $ex_oop holds exc. oop" %} + ins_encode(form_jmpl_set_exception_pc(jump_target)); + // opcode(Assembler::jmpl_op3, Assembler::arith_op); + // The hack duplicates the exception oop into G3, so that CreateEx can use it there. + // ins_encode( form3_rs1_simm13_rd( jump_target, 0x00, R_G0 ), move_return_pc_to_o1() ); + ins_avoid_back_to_back(AVOID_BEFORE); + ins_pipe(tail_call); +%} + +// Create exception oop: created by stack-crawling runtime code. +// Created exception is now available to this handler, and is setup +// just prior to jumping to this handler. No code emitted. +instruct CreateException( o0RegP ex_oop ) +%{ + match(Set ex_oop (CreateEx)); + ins_cost(0); + + size(0); + // use the following format syntax + format %{ "! exception oop is in R_O0; no code emitted" %} + ins_encode(); + ins_pipe(empty); +%} + + +// Rethrow exception: +// The exception oop will come in the first argument position. +// Then JUMP (not call) to the rethrow stub code. +instruct RethrowException() +%{ + match(Rethrow); + ins_cost(CALL_COST); + + // use the following format syntax + format %{ "Jmp rethrow_stub" %} + ins_encode(enc_rethrow); + ins_avoid_back_to_back(AVOID_BEFORE); + ins_pipe(tail_call); +%} + + +// Die now +instruct ShouldNotReachHere( ) +%{ + match(Halt); + ins_cost(CALL_COST); + + size(4); + // Use the following format syntax + format %{ "ILLTRAP ; ShouldNotReachHere" %} + ins_encode( form2_illtrap() ); + ins_pipe(tail_call); +%} + +// ============================================================================ +// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass +// array for an instance of the superklass. Set a hidden internal cache on a +// hit (cache is checked with exposed code in gen_subtype_check()). Return +// not zero for a miss or zero for a hit. The encoding ALSO sets flags. +instruct partialSubtypeCheck( o0RegP index, o1RegP sub, o2RegP super, flagsRegP pcc, o7RegP o7 ) %{ + match(Set index (PartialSubtypeCheck sub super)); + effect( KILL pcc, KILL o7 ); + ins_cost(DEFAULT_COST*10); + format %{ "CALL PartialSubtypeCheck\n\tNOP" %} + ins_encode( enc_PartialSubtypeCheck() ); + ins_avoid_back_to_back(AVOID_BEFORE); + ins_pipe(partial_subtype_check_pipe); +%} + +instruct partialSubtypeCheck_vs_zero( flagsRegP pcc, o1RegP sub, o2RegP super, immP0 zero, o0RegP idx, o7RegP o7 ) %{ + match(Set pcc (CmpP (PartialSubtypeCheck sub super) zero)); + effect( KILL idx, KILL o7 ); + ins_cost(DEFAULT_COST*10); + format %{ "CALL PartialSubtypeCheck\n\tNOP\t# (sets condition codes)" %} + ins_encode( enc_PartialSubtypeCheck() ); + ins_avoid_back_to_back(AVOID_BEFORE); + ins_pipe(partial_subtype_check_pipe); +%} + + +// ============================================================================ +// inlined locking and unlocking + +instruct cmpFastLock(flagsRegP pcc, iRegP object, o1RegP box, iRegP scratch2, o7RegP scratch ) %{ + match(Set pcc (FastLock object box)); + + effect(TEMP scratch2, USE_KILL box, KILL scratch); + ins_cost(100); + + format %{ "FASTLOCK $object,$box\t! kills $box,$scratch,$scratch2" %} + ins_encode( Fast_Lock(object, box, scratch, scratch2) ); + ins_pipe(long_memory_op); +%} + + +instruct cmpFastUnlock(flagsRegP pcc, iRegP object, o1RegP box, iRegP scratch2, o7RegP scratch ) %{ + match(Set pcc (FastUnlock object box)); + effect(TEMP scratch2, USE_KILL box, KILL scratch); + ins_cost(100); + + format %{ "FASTUNLOCK $object,$box\t! kills $box,$scratch,$scratch2" %} + ins_encode( Fast_Unlock(object, box, scratch, scratch2) ); + ins_pipe(long_memory_op); +%} + +// The encodings are generic. +instruct clear_array(iRegX cnt, iRegP base, iRegX temp, Universe dummy, flagsReg ccr) %{ + predicate(!use_block_zeroing(n->in(2)) ); + match(Set dummy (ClearArray cnt base)); + effect(TEMP temp, KILL ccr); + ins_cost(300); + format %{ "MOV $cnt,$temp\n" + "loop: SUBcc $temp,8,$temp\t! Count down a dword of bytes\n" + " BRge loop\t\t! Clearing loop\n" + " STX G0,[$base+$temp]\t! delay slot" %} + + ins_encode %{ + // Compiler ensures base is doubleword aligned and cnt is count of doublewords + Register nof_bytes_arg = $cnt$$Register; + Register nof_bytes_tmp = $temp$$Register; + Register base_pointer_arg = $base$$Register; + + Label loop; + __ mov(nof_bytes_arg, nof_bytes_tmp); + + // Loop and clear, walking backwards through the array. + // nof_bytes_tmp (if >0) is always the number of bytes to zero + __ bind(loop); + __ deccc(nof_bytes_tmp, 8); + __ br(Assembler::greaterEqual, true, Assembler::pt, loop); + __ delayed()-> stx(G0, base_pointer_arg, nof_bytes_tmp); + // %%%% this mini-loop must not cross a cache boundary! + %} + ins_pipe(long_memory_op); +%} + +instruct clear_array_bis(g1RegX cnt, o0RegP base, Universe dummy, flagsReg ccr) %{ + predicate(use_block_zeroing(n->in(2))); + match(Set dummy (ClearArray cnt base)); + effect(USE_KILL cnt, USE_KILL base, KILL ccr); + ins_cost(300); + format %{ "CLEAR [$base, $cnt]\t! ClearArray" %} + + ins_encode %{ + + assert(MinObjAlignmentInBytes >= BytesPerLong, "need alternate implementation"); + Register to = $base$$Register; + Register count = $cnt$$Register; + + Label Ldone; + __ nop(); // Separate short branches + // Use BIS for zeroing (temp is not used). + __ bis_zeroing(to, count, G0, Ldone); + __ bind(Ldone); + + %} + ins_pipe(long_memory_op); +%} + +instruct clear_array_bis_2(g1RegX cnt, o0RegP base, iRegX tmp, Universe dummy, flagsReg ccr) %{ + predicate(use_block_zeroing(n->in(2)) && !Assembler::is_simm13((int)BlockZeroingLowLimit)); + match(Set dummy (ClearArray cnt base)); + effect(TEMP tmp, USE_KILL cnt, USE_KILL base, KILL ccr); + ins_cost(300); + format %{ "CLEAR [$base, $cnt]\t! ClearArray" %} + + ins_encode %{ + + assert(MinObjAlignmentInBytes >= BytesPerLong, "need alternate implementation"); + Register to = $base$$Register; + Register count = $cnt$$Register; + Register temp = $tmp$$Register; + + Label Ldone; + __ nop(); // Separate short branches + // Use BIS for zeroing + __ bis_zeroing(to, count, temp, Ldone); + __ bind(Ldone); + + %} + ins_pipe(long_memory_op); +%} + +instruct string_compareL(o0RegP str1, o1RegP str2, g3RegI cnt1, g4RegI cnt2, notemp_iRegI result, + o7RegI tmp, flagsReg ccr) %{ + predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ccr, KILL tmp); + ins_cost(300); + format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp" %} + ins_encode %{ + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, + $tmp$$Register, $tmp$$Register, + $result$$Register, StrIntrinsicNode::LL); + %} + ins_pipe(long_memory_op); +%} + +instruct string_compareU(o0RegP str1, o1RegP str2, g3RegI cnt1, g4RegI cnt2, notemp_iRegI result, + o7RegI tmp, flagsReg ccr) %{ + predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ccr, KILL tmp); + ins_cost(300); + format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp" %} + ins_encode %{ + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, + $tmp$$Register, $tmp$$Register, + $result$$Register, StrIntrinsicNode::UU); + %} + ins_pipe(long_memory_op); +%} + +instruct string_compareLU(o0RegP str1, o1RegP str2, g3RegI cnt1, g4RegI cnt2, notemp_iRegI result, + o7RegI tmp1, g1RegI tmp2, flagsReg ccr) %{ + predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ccr, KILL tmp1, KILL tmp2); + ins_cost(300); + format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1,$tmp2" %} + ins_encode %{ + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, + $tmp1$$Register, $tmp2$$Register, + $result$$Register, StrIntrinsicNode::LU); + %} + ins_pipe(long_memory_op); +%} + +instruct string_compareUL(o0RegP str1, o1RegP str2, g3RegI cnt1, g4RegI cnt2, notemp_iRegI result, + o7RegI tmp1, g1RegI tmp2, flagsReg ccr) %{ + predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ccr, KILL tmp1, KILL tmp2); + ins_cost(300); + format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1,$tmp2" %} + ins_encode %{ + __ string_compare($str2$$Register, $str1$$Register, + $cnt2$$Register, $cnt1$$Register, + $tmp1$$Register, $tmp2$$Register, + $result$$Register, StrIntrinsicNode::UL); + %} + ins_pipe(long_memory_op); +%} + +instruct string_equalsL(o0RegP str1, o1RegP str2, g3RegI cnt, notemp_iRegI result, + o7RegI tmp, flagsReg ccr) %{ + predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp, KILL ccr); + ins_cost(300); + format %{ "String Equals byte[] $str1,$str2,$cnt -> $result // KILL $tmp" %} + ins_encode %{ + __ array_equals(false, $str1$$Register, $str2$$Register, + $cnt$$Register, $tmp$$Register, + $result$$Register, true /* byte */); + %} + ins_pipe(long_memory_op); +%} + +instruct string_equalsU(o0RegP str1, o1RegP str2, g3RegI cnt, notemp_iRegI result, + o7RegI tmp, flagsReg ccr) %{ + predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp, KILL ccr); + ins_cost(300); + format %{ "String Equals char[] $str1,$str2,$cnt -> $result // KILL $tmp" %} + ins_encode %{ + __ array_equals(false, $str1$$Register, $str2$$Register, + $cnt$$Register, $tmp$$Register, + $result$$Register, false /* byte */); + %} + ins_pipe(long_memory_op); +%} + +instruct array_equalsB(o0RegP ary1, o1RegP ary2, g3RegI tmp1, notemp_iRegI result, + o7RegI tmp2, flagsReg ccr) %{ + predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (AryEq ary1 ary2)); + effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL ccr); + ins_cost(300); + format %{ "Array Equals $ary1,$ary2 -> $result // KILL $tmp1,$tmp2" %} + ins_encode %{ + __ array_equals(true, $ary1$$Register, $ary2$$Register, + $tmp1$$Register, $tmp2$$Register, + $result$$Register, true /* byte */); + %} + ins_pipe(long_memory_op); +%} + +instruct array_equalsC(o0RegP ary1, o1RegP ary2, g3RegI tmp1, notemp_iRegI result, + o7RegI tmp2, flagsReg ccr) %{ + predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (AryEq ary1 ary2)); + effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL ccr); + ins_cost(300); + format %{ "Array Equals $ary1,$ary2 -> $result // KILL $tmp1,$tmp2" %} + ins_encode %{ + __ array_equals(true, $ary1$$Register, $ary2$$Register, + $tmp1$$Register, $tmp2$$Register, + $result$$Register, false /* byte */); + %} + ins_pipe(long_memory_op); +%} + +instruct has_negatives(o0RegP pAryR, g3RegI iSizeR, notemp_iRegI resultR, + iRegL tmp1L, iRegL tmp2L, iRegL tmp3L, iRegL tmp4L, + flagsReg ccr) +%{ + match(Set resultR (HasNegatives pAryR iSizeR)); + effect(TEMP resultR, TEMP tmp1L, TEMP tmp2L, TEMP tmp3L, TEMP tmp4L, USE pAryR, USE iSizeR, KILL ccr); + format %{ "has negatives byte[] $pAryR,$iSizeR -> $resultR // KILL $tmp1L,$tmp2L,$tmp3L,$tmp4L" %} + ins_encode %{ + __ has_negatives($pAryR$$Register, $iSizeR$$Register, + $resultR$$Register, + $tmp1L$$Register, $tmp2L$$Register, + $tmp3L$$Register, $tmp4L$$Register); + %} + ins_pipe(long_memory_op); +%} + +// char[] to byte[] compression +instruct string_compress(o0RegP src, o1RegP dst, g3RegI len, notemp_iRegI result, iRegL tmp, flagsReg ccr) %{ + predicate(UseVIS < 3); + match(Set result (StrCompressedCopy src (Binary dst len))); + effect(TEMP result, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL ccr); + ins_cost(300); + format %{ "String Compress $src,$dst,$len -> $result // KILL $tmp" %} + ins_encode %{ + Label Ldone; + __ signx($len$$Register); + __ cmp_zero_and_br(Assembler::zero, $len$$Register, Ldone, false, Assembler::pn); + __ delayed()->mov($len$$Register, $result$$Register); // copy count + __ string_compress($src$$Register, $dst$$Register, $len$$Register, $result$$Register, $tmp$$Register, Ldone); + __ bind(Ldone); + %} + ins_pipe(long_memory_op); +%} + +// fast char[] to byte[] compression using VIS instructions +instruct string_compress_fast(o0RegP src, o1RegP dst, g3RegI len, notemp_iRegI result, + iRegL tmp1, iRegL tmp2, iRegL tmp3, iRegL tmp4, + regD ftmp1, regD ftmp2, regD ftmp3, flagsReg ccr) %{ + predicate(UseVIS >= 3); + match(Set result (StrCompressedCopy src (Binary dst len))); + effect(TEMP result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ftmp1, TEMP ftmp2, TEMP ftmp3, USE_KILL src, USE_KILL dst, USE_KILL len, KILL ccr); + ins_cost(300); + format %{ "String Compress Fast $src,$dst,$len -> $result // KILL $tmp1,$tmp2,$tmp3,$tmp4,$ftmp1,$ftmp2,$ftmp3" %} + ins_encode %{ + Label Ldone; + __ signx($len$$Register); + __ string_compress_16($src$$Register, $dst$$Register, $len$$Register, $result$$Register, + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, + $ftmp1$$FloatRegister, $ftmp2$$FloatRegister, $ftmp3$$FloatRegister, Ldone); + __ cmp_and_brx_short($len$$Register, 0, Assembler::equal, Assembler::pn, Ldone); + __ string_compress($src$$Register, $dst$$Register, $len$$Register, $result$$Register, $tmp1$$Register, Ldone); + __ bind(Ldone); + %} + ins_pipe(long_memory_op); +%} + +// byte[] to char[] inflation +instruct string_inflate(Universe dummy, o0RegP src, o1RegP dst, g3RegI len, + iRegL tmp, flagsReg ccr) %{ + match(Set dummy (StrInflatedCopy src (Binary dst len))); + effect(TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL ccr); + ins_cost(300); + format %{ "String Inflate $src,$dst,$len // KILL $tmp" %} + ins_encode %{ + Label Ldone; + __ signx($len$$Register); + __ cmp_and_brx_short($len$$Register, 0, Assembler::equal, Assembler::pn, Ldone); + __ string_inflate($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register, Ldone); + __ bind(Ldone); + %} + ins_pipe(long_memory_op); +%} + +// fast byte[] to char[] inflation using VIS instructions +instruct string_inflate_fast(Universe dummy, o0RegP src, o1RegP dst, g3RegI len, + iRegL tmp, regD ftmp1, regD ftmp2, regD ftmp3, regD ftmp4, flagsReg ccr) %{ + predicate(UseVIS >= 3); + match(Set dummy (StrInflatedCopy src (Binary dst len))); + effect(TEMP tmp, TEMP ftmp1, TEMP ftmp2, TEMP ftmp3, TEMP ftmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL ccr); + ins_cost(300); + format %{ "String Inflate Fast $src,$dst,$len // KILL $tmp,$ftmp1,$ftmp2,$ftmp3,$ftmp4" %} + ins_encode %{ + Label Ldone; + __ signx($len$$Register); + __ string_inflate_16($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register, + $ftmp1$$FloatRegister, $ftmp2$$FloatRegister, $ftmp3$$FloatRegister, $ftmp4$$FloatRegister, Ldone); + __ cmp_and_brx_short($len$$Register, 0, Assembler::equal, Assembler::pn, Ldone); + __ string_inflate($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register, Ldone); + __ bind(Ldone); + %} + ins_pipe(long_memory_op); +%} + + +//---------- Zeros Count Instructions ------------------------------------------ + +instruct countLeadingZerosI(iRegIsafe dst, iRegI src, iRegI tmp, flagsReg cr) %{ + predicate(UsePopCountInstruction); // See Matcher::match_rule_supported + match(Set dst (CountLeadingZerosI src)); + effect(TEMP dst, TEMP tmp, KILL cr); + + // x |= (x >> 1); + // x |= (x >> 2); + // x |= (x >> 4); + // x |= (x >> 8); + // x |= (x >> 16); + // return (WORDBITS - popc(x)); + format %{ "SRL $src,1,$tmp\t! count leading zeros (int)\n\t" + "SRL $src,0,$dst\t! 32-bit zero extend\n\t" + "OR $dst,$tmp,$dst\n\t" + "SRL $dst,2,$tmp\n\t" + "OR $dst,$tmp,$dst\n\t" + "SRL $dst,4,$tmp\n\t" + "OR $dst,$tmp,$dst\n\t" + "SRL $dst,8,$tmp\n\t" + "OR $dst,$tmp,$dst\n\t" + "SRL $dst,16,$tmp\n\t" + "OR $dst,$tmp,$dst\n\t" + "POPC $dst,$dst\n\t" + "MOV 32,$tmp\n\t" + "SUB $tmp,$dst,$dst" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + Register Rtmp = $tmp$$Register; + __ srl(Rsrc, 1, Rtmp); + __ srl(Rsrc, 0, Rdst); + __ or3(Rdst, Rtmp, Rdst); + __ srl(Rdst, 2, Rtmp); + __ or3(Rdst, Rtmp, Rdst); + __ srl(Rdst, 4, Rtmp); + __ or3(Rdst, Rtmp, Rdst); + __ srl(Rdst, 8, Rtmp); + __ or3(Rdst, Rtmp, Rdst); + __ srl(Rdst, 16, Rtmp); + __ or3(Rdst, Rtmp, Rdst); + __ popc(Rdst, Rdst); + __ mov(BitsPerInt, Rtmp); + __ sub(Rtmp, Rdst, Rdst); + %} + ins_pipe(ialu_reg); +%} + +instruct countLeadingZerosL(iRegIsafe dst, iRegL src, iRegL tmp, flagsReg cr) %{ + predicate(UsePopCountInstruction); // See Matcher::match_rule_supported + match(Set dst (CountLeadingZerosL src)); + effect(TEMP dst, TEMP tmp, KILL cr); + + // x |= (x >> 1); + // x |= (x >> 2); + // x |= (x >> 4); + // x |= (x >> 8); + // x |= (x >> 16); + // x |= (x >> 32); + // return (WORDBITS - popc(x)); + format %{ "SRLX $src,1,$tmp\t! count leading zeros (long)\n\t" + "OR $src,$tmp,$dst\n\t" + "SRLX $dst,2,$tmp\n\t" + "OR $dst,$tmp,$dst\n\t" + "SRLX $dst,4,$tmp\n\t" + "OR $dst,$tmp,$dst\n\t" + "SRLX $dst,8,$tmp\n\t" + "OR $dst,$tmp,$dst\n\t" + "SRLX $dst,16,$tmp\n\t" + "OR $dst,$tmp,$dst\n\t" + "SRLX $dst,32,$tmp\n\t" + "OR $dst,$tmp,$dst\n\t" + "POPC $dst,$dst\n\t" + "MOV 64,$tmp\n\t" + "SUB $tmp,$dst,$dst" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + Register Rtmp = $tmp$$Register; + __ srlx(Rsrc, 1, Rtmp); + __ or3( Rsrc, Rtmp, Rdst); + __ srlx(Rdst, 2, Rtmp); + __ or3( Rdst, Rtmp, Rdst); + __ srlx(Rdst, 4, Rtmp); + __ or3( Rdst, Rtmp, Rdst); + __ srlx(Rdst, 8, Rtmp); + __ or3( Rdst, Rtmp, Rdst); + __ srlx(Rdst, 16, Rtmp); + __ or3( Rdst, Rtmp, Rdst); + __ srlx(Rdst, 32, Rtmp); + __ or3( Rdst, Rtmp, Rdst); + __ popc(Rdst, Rdst); + __ mov(BitsPerLong, Rtmp); + __ sub(Rtmp, Rdst, Rdst); + %} + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosI(iRegIsafe dst, iRegI src, flagsReg cr) %{ + predicate(UsePopCountInstruction); // See Matcher::match_rule_supported + match(Set dst (CountTrailingZerosI src)); + effect(TEMP dst, KILL cr); + + // return popc(~x & (x - 1)); + format %{ "SUB $src,1,$dst\t! count trailing zeros (int)\n\t" + "ANDN $dst,$src,$dst\n\t" + "SRL $dst,R_G0,$dst\n\t" + "POPC $dst,$dst" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + __ sub(Rsrc, 1, Rdst); + __ andn(Rdst, Rsrc, Rdst); + __ srl(Rdst, G0, Rdst); + __ popc(Rdst, Rdst); + %} + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosL(iRegIsafe dst, iRegL src, flagsReg cr) %{ + predicate(UsePopCountInstruction); // See Matcher::match_rule_supported + match(Set dst (CountTrailingZerosL src)); + effect(TEMP dst, KILL cr); + + // return popc(~x & (x - 1)); + format %{ "SUB $src,1,$dst\t! count trailing zeros (long)\n\t" + "ANDN $dst,$src,$dst\n\t" + "POPC $dst,$dst" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + __ sub(Rsrc, 1, Rdst); + __ andn(Rdst, Rsrc, Rdst); + __ popc(Rdst, Rdst); + %} + ins_pipe(ialu_reg); +%} + + +//---------- Population Count Instructions ------------------------------------- + +instruct popCountI(iRegIsafe dst, iRegI src) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountI src)); + + format %{ "SRL $src, G0, $dst\t! clear upper word for 64 bit POPC\n\t" + "POPC $dst, $dst" %} + ins_encode %{ + __ srl($src$$Register, G0, $dst$$Register); + __ popc($dst$$Register, $dst$$Register); + %} + ins_pipe(ialu_reg); +%} + +// Note: Long.bitCount(long) returns an int. +instruct popCountL(iRegIsafe dst, iRegL src) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountL src)); + + format %{ "POPC $src, $dst" %} + ins_encode %{ + __ popc($src$$Register, $dst$$Register); + %} + ins_pipe(ialu_reg); +%} + + +// ============================================================================ +//------------Bytes reverse-------------------------------------------------- + +instruct bytes_reverse_int(iRegI dst, stackSlotI src) %{ + match(Set dst (ReverseBytesI src)); + + // Op cost is artificially doubled to make sure that load or store + // instructions are preferred over this one which requires a spill + // onto a stack slot. + ins_cost(2*DEFAULT_COST + MEMORY_REF_COST); + format %{ "LDUWA $src, $dst\t!asi=primary_little" %} + + ins_encode %{ + __ set($src$$disp + STACK_BIAS, O7); + __ lduwa($src$$base$$Register, O7, Assembler::ASI_PRIMARY_LITTLE, $dst$$Register); + %} + ins_pipe( iload_mem ); +%} + +instruct bytes_reverse_long(iRegL dst, stackSlotL src) %{ + match(Set dst (ReverseBytesL src)); + + // Op cost is artificially doubled to make sure that load or store + // instructions are preferred over this one which requires a spill + // onto a stack slot. + ins_cost(2*DEFAULT_COST + MEMORY_REF_COST); + format %{ "LDXA $src, $dst\t!asi=primary_little" %} + + ins_encode %{ + __ set($src$$disp + STACK_BIAS, O7); + __ ldxa($src$$base$$Register, O7, Assembler::ASI_PRIMARY_LITTLE, $dst$$Register); + %} + ins_pipe( iload_mem ); +%} + +instruct bytes_reverse_unsigned_short(iRegI dst, stackSlotI src) %{ + match(Set dst (ReverseBytesUS src)); + + // Op cost is artificially doubled to make sure that load or store + // instructions are preferred over this one which requires a spill + // onto a stack slot. + ins_cost(2*DEFAULT_COST + MEMORY_REF_COST); + format %{ "LDUHA $src, $dst\t!asi=primary_little\n\t" %} + + ins_encode %{ + // the value was spilled as an int so bias the load + __ set($src$$disp + STACK_BIAS + 2, O7); + __ lduha($src$$base$$Register, O7, Assembler::ASI_PRIMARY_LITTLE, $dst$$Register); + %} + ins_pipe( iload_mem ); +%} + +instruct bytes_reverse_short(iRegI dst, stackSlotI src) %{ + match(Set dst (ReverseBytesS src)); + + // Op cost is artificially doubled to make sure that load or store + // instructions are preferred over this one which requires a spill + // onto a stack slot. + ins_cost(2*DEFAULT_COST + MEMORY_REF_COST); + format %{ "LDSHA $src, $dst\t!asi=primary_little\n\t" %} + + ins_encode %{ + // the value was spilled as an int so bias the load + __ set($src$$disp + STACK_BIAS + 2, O7); + __ ldsha($src$$base$$Register, O7, Assembler::ASI_PRIMARY_LITTLE, $dst$$Register); + %} + ins_pipe( iload_mem ); +%} + +// Load Integer reversed byte order +instruct loadI_reversed(iRegI dst, indIndexMemory src) %{ + match(Set dst (ReverseBytesI (LoadI src))); + + ins_cost(DEFAULT_COST + MEMORY_REF_COST); + size(4); + format %{ "LDUWA $src, $dst\t!asi=primary_little" %} + + ins_encode %{ + __ lduwa($src$$base$$Register, $src$$index$$Register, Assembler::ASI_PRIMARY_LITTLE, $dst$$Register); + %} + ins_pipe(iload_mem); +%} + +// Load Long - aligned and reversed +instruct loadL_reversed(iRegL dst, indIndexMemory src) %{ + match(Set dst (ReverseBytesL (LoadL src))); + + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "LDXA $src, $dst\t!asi=primary_little" %} + + ins_encode %{ + __ ldxa($src$$base$$Register, $src$$index$$Register, Assembler::ASI_PRIMARY_LITTLE, $dst$$Register); + %} + ins_pipe(iload_mem); +%} + +// Load unsigned short / char reversed byte order +instruct loadUS_reversed(iRegI dst, indIndexMemory src) %{ + match(Set dst (ReverseBytesUS (LoadUS src))); + + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "LDUHA $src, $dst\t!asi=primary_little" %} + + ins_encode %{ + __ lduha($src$$base$$Register, $src$$index$$Register, Assembler::ASI_PRIMARY_LITTLE, $dst$$Register); + %} + ins_pipe(iload_mem); +%} + +// Load short reversed byte order +instruct loadS_reversed(iRegI dst, indIndexMemory src) %{ + match(Set dst (ReverseBytesS (LoadS src))); + + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "LDSHA $src, $dst\t!asi=primary_little" %} + + ins_encode %{ + __ ldsha($src$$base$$Register, $src$$index$$Register, Assembler::ASI_PRIMARY_LITTLE, $dst$$Register); + %} + ins_pipe(iload_mem); +%} + +// Store Integer reversed byte order +instruct storeI_reversed(indIndexMemory dst, iRegI src) %{ + match(Set dst (StoreI dst (ReverseBytesI src))); + + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "STWA $src, $dst\t!asi=primary_little" %} + + ins_encode %{ + __ stwa($src$$Register, $dst$$base$$Register, $dst$$index$$Register, Assembler::ASI_PRIMARY_LITTLE); + %} + ins_pipe(istore_mem_reg); +%} + +// Store Long reversed byte order +instruct storeL_reversed(indIndexMemory dst, iRegL src) %{ + match(Set dst (StoreL dst (ReverseBytesL src))); + + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "STXA $src, $dst\t!asi=primary_little" %} + + ins_encode %{ + __ stxa($src$$Register, $dst$$base$$Register, $dst$$index$$Register, Assembler::ASI_PRIMARY_LITTLE); + %} + ins_pipe(istore_mem_reg); +%} + +// Store unsighed short/char reversed byte order +instruct storeUS_reversed(indIndexMemory dst, iRegI src) %{ + match(Set dst (StoreC dst (ReverseBytesUS src))); + + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "STHA $src, $dst\t!asi=primary_little" %} + + ins_encode %{ + __ stha($src$$Register, $dst$$base$$Register, $dst$$index$$Register, Assembler::ASI_PRIMARY_LITTLE); + %} + ins_pipe(istore_mem_reg); +%} + +// Store short reversed byte order +instruct storeS_reversed(indIndexMemory dst, iRegI src) %{ + match(Set dst (StoreC dst (ReverseBytesS src))); + + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "STHA $src, $dst\t!asi=primary_little" %} + + ins_encode %{ + __ stha($src$$Register, $dst$$base$$Register, $dst$$index$$Register, Assembler::ASI_PRIMARY_LITTLE); + %} + ins_pipe(istore_mem_reg); +%} + +// ====================VECTOR INSTRUCTIONS===================================== + +// Load Aligned Packed values into a Double Register +instruct loadV8(regD dst, memory mem) %{ + predicate(n->as_LoadVector()->memory_size() == 8); + match(Set dst (LoadVector mem)); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "LDDF $mem,$dst\t! load vector (8 bytes)" %} + ins_encode %{ + __ ldf(FloatRegisterImpl::D, $mem$$Address, as_DoubleFloatRegister($dst$$reg)); + %} + ins_pipe(floadD_mem); +%} + +// Store Vector in Double register to memory +instruct storeV8(memory mem, regD src) %{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem src)); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "STDF $src,$mem\t! store vector (8 bytes)" %} + ins_encode %{ + __ stf(FloatRegisterImpl::D, as_DoubleFloatRegister($src$$reg), $mem$$Address); + %} + ins_pipe(fstoreD_mem_reg); +%} + +// Store Zero into vector in memory +instruct storeV8B_zero(memory mem, immI0 zero) %{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem (ReplicateB zero))); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "STX $zero,$mem\t! store zero vector (8 bytes)" %} + ins_encode %{ + __ stx(G0, $mem$$Address); + %} + ins_pipe(fstoreD_mem_zero); +%} + +instruct storeV4S_zero(memory mem, immI0 zero) %{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem (ReplicateS zero))); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "STX $zero,$mem\t! store zero vector (4 shorts)" %} + ins_encode %{ + __ stx(G0, $mem$$Address); + %} + ins_pipe(fstoreD_mem_zero); +%} + +instruct storeV2I_zero(memory mem, immI0 zero) %{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem (ReplicateI zero))); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "STX $zero,$mem\t! store zero vector (2 ints)" %} + ins_encode %{ + __ stx(G0, $mem$$Address); + %} + ins_pipe(fstoreD_mem_zero); +%} + +instruct storeV2F_zero(memory mem, immF0 zero) %{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem (ReplicateF zero))); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "STX $zero,$mem\t! store zero vector (2 floats)" %} + ins_encode %{ + __ stx(G0, $mem$$Address); + %} + ins_pipe(fstoreD_mem_zero); +%} + +// Replicate scalar to packed byte values into Double register +instruct Repl8B_reg(regD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{ + predicate(n->as_Vector()->length() == 8 && UseVIS >= 3); + match(Set dst (ReplicateB src)); + effect(DEF dst, USE src, TEMP tmp, KILL tmp2); + format %{ "SLLX $src,56,$tmp\n\t" + "SRLX $tmp, 8,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\n\t" + "SRLX $tmp,16,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\n\t" + "SRLX $tmp,32,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\t! replicate8B\n\t" + "MOVXTOD $tmp,$dst\t! MoveL2D" %} + ins_encode %{ + Register Rsrc = $src$$Register; + Register Rtmp = $tmp$$Register; + Register Rtmp2 = $tmp2$$Register; + __ sllx(Rsrc, 56, Rtmp); + __ srlx(Rtmp, 8, Rtmp2); + __ or3 (Rtmp, Rtmp2, Rtmp); + __ srlx(Rtmp, 16, Rtmp2); + __ or3 (Rtmp, Rtmp2, Rtmp); + __ srlx(Rtmp, 32, Rtmp2); + __ or3 (Rtmp, Rtmp2, Rtmp); + __ movxtod(Rtmp, as_DoubleFloatRegister($dst$$reg)); + %} + ins_pipe(ialu_reg); +%} + +// Replicate scalar to packed byte values into Double stack +instruct Repl8B_stk(stackSlotD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{ + predicate(n->as_Vector()->length() == 8 && UseVIS < 3); + match(Set dst (ReplicateB src)); + effect(DEF dst, USE src, TEMP tmp, KILL tmp2); + format %{ "SLLX $src,56,$tmp\n\t" + "SRLX $tmp, 8,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\n\t" + "SRLX $tmp,16,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\n\t" + "SRLX $tmp,32,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\t! replicate8B\n\t" + "STX $tmp,$dst\t! regL to stkD" %} + ins_encode %{ + Register Rsrc = $src$$Register; + Register Rtmp = $tmp$$Register; + Register Rtmp2 = $tmp2$$Register; + __ sllx(Rsrc, 56, Rtmp); + __ srlx(Rtmp, 8, Rtmp2); + __ or3 (Rtmp, Rtmp2, Rtmp); + __ srlx(Rtmp, 16, Rtmp2); + __ or3 (Rtmp, Rtmp2, Rtmp); + __ srlx(Rtmp, 32, Rtmp2); + __ or3 (Rtmp, Rtmp2, Rtmp); + __ set ($dst$$disp + STACK_BIAS, Rtmp2); + __ stx (Rtmp, Rtmp2, $dst$$base$$Register); + %} + ins_pipe(ialu_reg); +%} + +// Replicate scalar constant to packed byte values in Double register +instruct Repl8B_immI(regD dst, immI13 con, o7RegI tmp) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateB con)); + effect(KILL tmp); + format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl8B($con)" %} + ins_encode %{ + // XXX This is a quick fix for 6833573. + //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 8, 1)), $dst$$FloatRegister); + RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 8, 1)), $tmp$$Register); + __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg)); + %} + ins_pipe(loadConFD); +%} + +// Replicate scalar to packed char/short values into Double register +instruct Repl4S_reg(regD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{ + predicate(n->as_Vector()->length() == 4 && UseVIS >= 3); + match(Set dst (ReplicateS src)); + effect(DEF dst, USE src, TEMP tmp, KILL tmp2); + format %{ "SLLX $src,48,$tmp\n\t" + "SRLX $tmp,16,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\n\t" + "SRLX $tmp,32,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\t! replicate4S\n\t" + "MOVXTOD $tmp,$dst\t! MoveL2D" %} + ins_encode %{ + Register Rsrc = $src$$Register; + Register Rtmp = $tmp$$Register; + Register Rtmp2 = $tmp2$$Register; + __ sllx(Rsrc, 48, Rtmp); + __ srlx(Rtmp, 16, Rtmp2); + __ or3 (Rtmp, Rtmp2, Rtmp); + __ srlx(Rtmp, 32, Rtmp2); + __ or3 (Rtmp, Rtmp2, Rtmp); + __ movxtod(Rtmp, as_DoubleFloatRegister($dst$$reg)); + %} + ins_pipe(ialu_reg); +%} + +// Replicate scalar to packed char/short values into Double stack +instruct Repl4S_stk(stackSlotD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{ + predicate(n->as_Vector()->length() == 4 && UseVIS < 3); + match(Set dst (ReplicateS src)); + effect(DEF dst, USE src, TEMP tmp, KILL tmp2); + format %{ "SLLX $src,48,$tmp\n\t" + "SRLX $tmp,16,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\n\t" + "SRLX $tmp,32,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\t! replicate4S\n\t" + "STX $tmp,$dst\t! regL to stkD" %} + ins_encode %{ + Register Rsrc = $src$$Register; + Register Rtmp = $tmp$$Register; + Register Rtmp2 = $tmp2$$Register; + __ sllx(Rsrc, 48, Rtmp); + __ srlx(Rtmp, 16, Rtmp2); + __ or3 (Rtmp, Rtmp2, Rtmp); + __ srlx(Rtmp, 32, Rtmp2); + __ or3 (Rtmp, Rtmp2, Rtmp); + __ set ($dst$$disp + STACK_BIAS, Rtmp2); + __ stx (Rtmp, Rtmp2, $dst$$base$$Register); + %} + ins_pipe(ialu_reg); +%} + +// Replicate scalar constant to packed char/short values in Double register +instruct Repl4S_immI(regD dst, immI con, o7RegI tmp) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateS con)); + effect(KILL tmp); + format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl4S($con)" %} + ins_encode %{ + // XXX This is a quick fix for 6833573. + //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 4, 2)), $dst$$FloatRegister); + RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 4, 2)), $tmp$$Register); + __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg)); + %} + ins_pipe(loadConFD); +%} + +// Replicate scalar to packed int values into Double register +instruct Repl2I_reg(regD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{ + predicate(n->as_Vector()->length() == 2 && UseVIS >= 3); + match(Set dst (ReplicateI src)); + effect(DEF dst, USE src, TEMP tmp, KILL tmp2); + format %{ "SLLX $src,32,$tmp\n\t" + "SRLX $tmp,32,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\t! replicate2I\n\t" + "MOVXTOD $tmp,$dst\t! MoveL2D" %} + ins_encode %{ + Register Rsrc = $src$$Register; + Register Rtmp = $tmp$$Register; + Register Rtmp2 = $tmp2$$Register; + __ sllx(Rsrc, 32, Rtmp); + __ srlx(Rtmp, 32, Rtmp2); + __ or3 (Rtmp, Rtmp2, Rtmp); + __ movxtod(Rtmp, as_DoubleFloatRegister($dst$$reg)); + %} + ins_pipe(ialu_reg); +%} + +// Replicate scalar to packed int values into Double stack +instruct Repl2I_stk(stackSlotD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{ + predicate(n->as_Vector()->length() == 2 && UseVIS < 3); + match(Set dst (ReplicateI src)); + effect(DEF dst, USE src, TEMP tmp, KILL tmp2); + format %{ "SLLX $src,32,$tmp\n\t" + "SRLX $tmp,32,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\t! replicate2I\n\t" + "STX $tmp,$dst\t! regL to stkD" %} + ins_encode %{ + Register Rsrc = $src$$Register; + Register Rtmp = $tmp$$Register; + Register Rtmp2 = $tmp2$$Register; + __ sllx(Rsrc, 32, Rtmp); + __ srlx(Rtmp, 32, Rtmp2); + __ or3 (Rtmp, Rtmp2, Rtmp); + __ set ($dst$$disp + STACK_BIAS, Rtmp2); + __ stx (Rtmp, Rtmp2, $dst$$base$$Register); + %} + ins_pipe(ialu_reg); +%} + +// Replicate scalar zero constant to packed int values in Double register +instruct Repl2I_immI(regD dst, immI con, o7RegI tmp) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI con)); + effect(KILL tmp); + format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl2I($con)" %} + ins_encode %{ + // XXX This is a quick fix for 6833573. + //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 2, 4)), $dst$$FloatRegister); + RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 2, 4)), $tmp$$Register); + __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg)); + %} + ins_pipe(loadConFD); +%} + +// Replicate scalar to packed float values into Double stack +instruct Repl2F_stk(stackSlotD dst, regF src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateF src)); + ins_cost(MEMORY_REF_COST*2); + format %{ "STF $src,$dst.hi\t! packed2F\n\t" + "STF $src,$dst.lo" %} + opcode(Assembler::stf_op3); + ins_encode(simple_form3_mem_reg(dst, src), form3_mem_plus_4_reg(dst, src)); + ins_pipe(fstoreF_stk_reg); +%} + +// Replicate scalar zero constant to packed float values in Double register +instruct Repl2F_immF(regD dst, immF con, o7RegI tmp) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateF con)); + effect(KILL tmp); + format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl2F($con)" %} + ins_encode %{ + // XXX This is a quick fix for 6833573. + //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immF($con$$constant)), $dst$$FloatRegister); + RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immF($con$$constant)), $tmp$$Register); + __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg)); + %} + ins_pipe(loadConFD); +%} + +//----------PEEPHOLE RULES----------------------------------------------------- +// These must follow all instruction definitions as they use the names +// defined in the instructions definitions. +// +// peepmatch ( root_instr_name [preceding_instruction]* ); +// +// peepconstraint %{ +// (instruction_number.operand_name relational_op instruction_number.operand_name +// [, ...] ); +// // instruction numbers are zero-based using left to right order in peepmatch +// +// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); +// // provide an instruction_number.operand_name for each operand that appears +// // in the replacement instruction's match rule +// +// ---------VM FLAGS--------------------------------------------------------- +// +// All peephole optimizations can be turned off using -XX:-OptoPeephole +// +// Each peephole rule is given an identifying number starting with zero and +// increasing by one in the order seen by the parser. An individual peephole +// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# +// on the command-line. +// +// ---------CURRENT LIMITATIONS---------------------------------------------- +// +// Only match adjacent instructions in same basic block +// Only equality constraints +// Only constraints between operands, not (0.dest_reg == EAX_enc) +// Only one replacement instruction +// +// ---------EXAMPLE---------------------------------------------------------- +// +// // pertinent parts of existing instructions in architecture description +// instruct movI(eRegI dst, eRegI src) %{ +// match(Set dst (CopyI src)); +// %} +// +// instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{ +// match(Set dst (AddI dst src)); +// effect(KILL cr); +// %} +// +// // Change (inc mov) to lea +// peephole %{ +// // increment preceded by register-register move +// peepmatch ( incI_eReg movI ); +// // require that the destination register of the increment +// // match the destination register of the move +// peepconstraint ( 0.dst == 1.dst ); +// // construct a replacement instruction that sets +// // the destination to ( move's source register + one ) +// peepreplace ( incI_eReg_immI1( 0.dst 1.src 0.src ) ); +// %} +// + +// // Change load of spilled value to only a spill +// instruct storeI(memory mem, eRegI src) %{ +// match(Set mem (StoreI mem src)); +// %} +// +// instruct loadI(eRegI dst, memory mem) %{ +// match(Set dst (LoadI mem)); +// %} +// +// peephole %{ +// peepmatch ( loadI storeI ); +// peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); +// peepreplace ( storeI( 1.mem 1.mem 1.src ) ); +// %} + +//----------SMARTSPILL RULES--------------------------------------------------- +// These must follow all instruction definitions as they use the names +// defined in the instructions definitions. +// +// SPARC will probably not have any of these rules due to RISC instruction set. + +//----------PIPELINE----------------------------------------------------------- +// Rules which define the behavior of the target architectures pipeline. diff -ur --new-file a/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp b/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp --- a/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp 2023-04-16 11:42:11.076659005 +0000 @@ -0,0 +1,5786 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "interpreter/interpreter.hpp" +#include "nativeInst_sparc.hpp" +#include "oops/instanceOop.hpp" +#include "oops/method.hpp" +#include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif + +// Declaration and definition of StubGenerator (no .hpp file). +// For a more detailed description of the stub routine structure +// see the comment in stubRoutines.hpp. + +#define __ _masm-> + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +// Note: The register L7 is used as L7_thread_cache, and may not be used +// any other way within this module. + +static const Register& Lstub_temp = L2; + +// ------------------------------------------------------------------------------------------------------------------------- +// Stub Code definitions + +class StubGenerator: public StubCodeGenerator { + private: + +#ifdef PRODUCT +#define inc_counter_np(a,b,c) +#else +#define inc_counter_np(counter, t1, t2) \ + BLOCK_COMMENT("inc_counter " #counter); \ + __ inc_counter(&counter, t1, t2); +#endif + + //---------------------------------------------------------------------------------------------------- + // Call stubs are used to call Java from C + + address generate_call_stub(address& return_pc) { + StubCodeMark mark(this, "StubRoutines", "call_stub"); + address start = __ pc(); + + // Incoming arguments: + // + // o0 : call wrapper address + // o1 : result (address) + // o2 : result type + // o3 : method + // o4 : (interpreter) entry point + // o5 : parameters (address) + // [sp + 0x5c]: parameter size (in words) + // [sp + 0x60]: thread + // + // +---------------+ <--- sp + 0 + // | | + // . reg save area . + // | | + // +---------------+ <--- sp + 0x40 + // | | + // . extra 7 slots . + // | | + // +---------------+ <--- sp + 0x5c + // | param. size | + // +---------------+ <--- sp + 0x60 + // | thread | + // +---------------+ + // | | + + // note: if the link argument position changes, adjust + // the code in frame::entry_frame_call_wrapper() + + const Argument link = Argument(0, false); // used only for GC + const Argument result = Argument(1, false); + const Argument result_type = Argument(2, false); + const Argument method = Argument(3, false); + const Argument entry_point = Argument(4, false); + const Argument parameters = Argument(5, false); + const Argument parameter_size = Argument(6, false); + const Argument thread = Argument(7, false); + + // setup thread register + __ ld_ptr(thread.as_address(), G2_thread); + __ reinit_heapbase(); + +#ifdef ASSERT + // make sure we have no pending exceptions + { const Register t = G3_scratch; + Label L; + __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), t); + __ br_null_short(t, Assembler::pt, L); + __ stop("StubRoutines::call_stub: entered with pending exception"); + __ bind(L); + } +#endif + + // create activation frame & allocate space for parameters + { const Register t = G3_scratch; + __ ld_ptr(parameter_size.as_address(), t); // get parameter size (in words) + __ add(t, frame::memory_parameter_word_sp_offset, t); // add space for save area (in words) + __ round_to(t, WordsPerLong); // make sure it is multiple of 2 (in words) + __ sll(t, Interpreter::logStackElementSize, t); // compute number of bytes + __ neg(t); // negate so it can be used with save + __ save(SP, t, SP); // setup new frame + } + + // +---------------+ <--- sp + 0 + // | | + // . reg save area . + // | | + // +---------------+ <--- sp + 0x40 + // | | + // . extra 7 slots . + // | | + // +---------------+ <--- sp + 0x5c + // | empty slot | (only if parameter size is even) + // +---------------+ + // | | + // . parameters . + // | | + // +---------------+ <--- fp + 0 + // | | + // . reg save area . + // | | + // +---------------+ <--- fp + 0x40 + // | | + // . extra 7 slots . + // | | + // +---------------+ <--- fp + 0x5c + // | param. size | + // +---------------+ <--- fp + 0x60 + // | thread | + // +---------------+ + // | | + + // pass parameters if any + BLOCK_COMMENT("pass parameters if any"); + { const Register src = parameters.as_in().as_register(); + const Register dst = Lentry_args; + const Register tmp = G3_scratch; + const Register cnt = G4_scratch; + + // test if any parameters & setup of Lentry_args + Label exit; + __ ld_ptr(parameter_size.as_in().as_address(), cnt); // parameter counter + __ add( FP, STACK_BIAS, dst ); + __ cmp_zero_and_br(Assembler::zero, cnt, exit); + __ delayed()->sub(dst, BytesPerWord, dst); // setup Lentry_args + + // copy parameters if any + Label loop; + __ BIND(loop); + // Store parameter value + __ ld_ptr(src, 0, tmp); + __ add(src, BytesPerWord, src); + __ st_ptr(tmp, dst, 0); + __ deccc(cnt); + __ br(Assembler::greater, false, Assembler::pt, loop); + __ delayed()->sub(dst, Interpreter::stackElementSize, dst); + + // done + __ BIND(exit); + } + + // setup parameters, method & call Java function +#ifdef ASSERT + // layout_activation_impl checks it's notion of saved SP against + // this register, so if this changes update it as well. + const Register saved_SP = Lscratch; + __ mov(SP, saved_SP); // keep track of SP before call +#endif + + // setup parameters + const Register t = G3_scratch; + __ ld_ptr(parameter_size.as_in().as_address(), t); // get parameter size (in words) + __ sll(t, Interpreter::logStackElementSize, t); // compute number of bytes + __ sub(FP, t, Gargs); // setup parameter pointer + __ add( Gargs, STACK_BIAS, Gargs ); // Account for LP64 stack bias + __ mov(SP, O5_savedSP); + + + // do the call + // + // the following register must be setup: + // + // G2_thread + // G5_method + // Gargs + BLOCK_COMMENT("call Java function"); + __ jmpl(entry_point.as_in().as_register(), G0, O7); + __ delayed()->mov(method.as_in().as_register(), G5_method); // setup method + + BLOCK_COMMENT("call_stub_return_address:"); + return_pc = __ pc(); + + // The callee, if it wasn't interpreted, can return with SP changed so + // we can no longer assert of change of SP. + + // store result depending on type + // (everything that is not T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE + // is treated as T_INT) + { const Register addr = result .as_in().as_register(); + const Register type = result_type.as_in().as_register(); + Label is_long, is_float, is_double, is_object, exit; + __ cmp(type, T_OBJECT); __ br(Assembler::equal, false, Assembler::pn, is_object); + __ delayed()->cmp(type, T_FLOAT); __ br(Assembler::equal, false, Assembler::pn, is_float); + __ delayed()->cmp(type, T_DOUBLE); __ br(Assembler::equal, false, Assembler::pn, is_double); + __ delayed()->cmp(type, T_LONG); __ br(Assembler::equal, false, Assembler::pn, is_long); + __ delayed()->nop(); + + // store int result + __ st(O0, addr, G0); + + __ BIND(exit); + __ ret(); + __ delayed()->restore(); + + __ BIND(is_object); + __ ba(exit); + __ delayed()->st_ptr(O0, addr, G0); + + __ BIND(is_float); + __ ba(exit); + __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0); + + __ BIND(is_double); + __ ba(exit); + __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0); + + __ BIND(is_long); + __ ba(exit); + __ delayed()->st_long(O0, addr, G0); // store entire long + } + return start; + } + + + //---------------------------------------------------------------------------------------------------- + // Return point for a Java call if there's an exception thrown in Java code. + // The exception is caught and transformed into a pending exception stored in + // JavaThread that can be tested from within the VM. + // + // Oexception: exception oop + + address generate_catch_exception() { + StubCodeMark mark(this, "StubRoutines", "catch_exception"); + + address start = __ pc(); + // verify that thread corresponds + __ verify_thread(); + + const Register& temp_reg = Gtemp; + Address pending_exception_addr (G2_thread, Thread::pending_exception_offset()); + Address exception_file_offset_addr(G2_thread, Thread::exception_file_offset ()); + Address exception_line_offset_addr(G2_thread, Thread::exception_line_offset ()); + + // set pending exception + __ verify_oop(Oexception); + __ st_ptr(Oexception, pending_exception_addr); + __ set((intptr_t)__FILE__, temp_reg); + __ st_ptr(temp_reg, exception_file_offset_addr); + __ set((intptr_t)__LINE__, temp_reg); + __ st(temp_reg, exception_line_offset_addr); + + // complete return to VM + assert(StubRoutines::_call_stub_return_address != NULL, "must have been generated before"); + + AddressLiteral stub_ret(StubRoutines::_call_stub_return_address); + __ jump_to(stub_ret, temp_reg); + __ delayed()->nop(); + + return start; + } + + + //---------------------------------------------------------------------------------------------------- + // Continuation point for runtime calls returning with a pending exception + // The pending exception check happened in the runtime or native call stub + // The pending exception in Thread is converted into a Java-level exception + // + // Contract with Java-level exception handler: O0 = exception + // O1 = throwing pc + + address generate_forward_exception() { + StubCodeMark mark(this, "StubRoutines", "forward_exception"); + address start = __ pc(); + + // Upon entry, O7 has the return address returning into Java + // (interpreted or compiled) code; i.e. the return address + // becomes the throwing pc. + + const Register& handler_reg = Gtemp; + + Address exception_addr(G2_thread, Thread::pending_exception_offset()); + +#ifdef ASSERT + // make sure that this code is only executed if there is a pending exception + { Label L; + __ ld_ptr(exception_addr, Gtemp); + __ br_notnull_short(Gtemp, Assembler::pt, L); + __ stop("StubRoutines::forward exception: no pending exception (1)"); + __ bind(L); + } +#endif + + // compute exception handler into handler_reg + __ get_thread(); + __ ld_ptr(exception_addr, Oexception); + __ verify_oop(Oexception); + __ save_frame(0); // compensates for compiler weakness + __ add(O7->after_save(), frame::pc_return_offset, Lscratch); // save the issuing PC + BLOCK_COMMENT("call exception_handler_for_return_address"); + __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), G2_thread, Lscratch); + __ mov(O0, handler_reg); + __ restore(); // compensates for compiler weakness + + __ ld_ptr(exception_addr, Oexception); + __ add(O7, frame::pc_return_offset, Oissuing_pc); // save the issuing PC + +#ifdef ASSERT + // make sure exception is set + { Label L; + __ br_notnull_short(Oexception, Assembler::pt, L); + __ stop("StubRoutines::forward exception: no pending exception (2)"); + __ bind(L); + } +#endif + // jump to exception handler + __ jmp(handler_reg, 0); + // clear pending exception + __ delayed()->st_ptr(G0, exception_addr); + + return start; + } + + //------------------------------------------------------------------------------------------------------------------------ + // Continuation point for throwing of implicit exceptions that are not handled in + // the current activation. Fabricates an exception oop and initiates normal + // exception dispatching in this frame. Only callee-saved registers are preserved + // (through the normal register window / RegisterMap handling). + // If the compiler needs all registers to be preserved between the fault + // point and the exception handler then it must assume responsibility for that in + // AbstractCompiler::continuation_for_implicit_null_exception or + // continuation_for_implicit_division_by_zero_exception. All other implicit + // exceptions (e.g., NullPointerException or AbstractMethodError on entry) are + // either at call sites or otherwise assume that stack unwinding will be initiated, + // so caller saved registers were assumed volatile in the compiler. + + // Note that we generate only this stub into a RuntimeStub, because it needs to be + // properly traversed and ignored during GC, so we change the meaning of the "__" + // macro within this method. +#undef __ +#define __ masm-> + + address generate_throw_exception(const char* name, address runtime_entry, + Register arg1 = noreg, Register arg2 = noreg) { +#ifdef ASSERT + int insts_size = VerifyThread ? 1 * K : 600; +#else + int insts_size = VerifyThread ? 1 * K : 256; +#endif /* ASSERT */ + int locs_size = 32; + + CodeBuffer code(name, insts_size, locs_size); + MacroAssembler* masm = new MacroAssembler(&code); + + __ verify_thread(); + + // This is an inlined and slightly modified version of call_VM + // which has the ability to fetch the return PC out of thread-local storage + __ assert_not_delayed(); + + // Note that we always push a frame because on the SPARC + // architecture, for all of our implicit exception kinds at call + // sites, the implicit exception is taken before the callee frame + // is pushed. + __ save_frame(0); + + int frame_complete = __ offset(); + + // Note that we always have a runtime stub frame on the top of stack by this point + Register last_java_sp = SP; + // 64-bit last_java_sp is biased! + __ set_last_Java_frame(last_java_sp, G0); + if (VerifyThread) __ mov(G2_thread, O0); // about to be smashed; pass early + __ save_thread(noreg); + if (arg1 != noreg) { + assert(arg2 != O1, "clobbered"); + __ mov(arg1, O1); + } + if (arg2 != noreg) { + __ mov(arg2, O2); + } + // do the call + BLOCK_COMMENT("call runtime_entry"); + __ call(runtime_entry, relocInfo::runtime_call_type); + if (!VerifyThread) + __ delayed()->mov(G2_thread, O0); // pass thread as first argument + else + __ delayed()->nop(); // (thread already passed) + __ restore_thread(noreg); + __ reset_last_Java_frame(); + + // check for pending exceptions. use Gtemp as scratch register. +#ifdef ASSERT + Label L; + + Address exception_addr(G2_thread, Thread::pending_exception_offset()); + Register scratch_reg = Gtemp; + __ ld_ptr(exception_addr, scratch_reg); + __ br_notnull_short(scratch_reg, Assembler::pt, L); + __ should_not_reach_here(); + __ bind(L); +#endif // ASSERT + BLOCK_COMMENT("call forward_exception_entry"); + __ call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + // we use O7 linkage so that forward_exception_entry has the issuing PC + __ delayed()->restore(); + + RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, masm->total_frame_size_in_bytes(0), NULL, false); + return stub->entry_point(); + } + +#undef __ +#define __ _masm-> + + + // Generate a routine that sets all the registers so we + // can tell if the stop routine prints them correctly. + address generate_test_stop() { + StubCodeMark mark(this, "StubRoutines", "test_stop"); + address start = __ pc(); + + int i; + + __ save_frame(0); + + static jfloat zero = 0.0, one = 1.0; + + // put addr in L0, then load through L0 to F0 + __ set((intptr_t)&zero, L0); __ ldf( FloatRegisterImpl::S, L0, 0, F0); + __ set((intptr_t)&one, L0); __ ldf( FloatRegisterImpl::S, L0, 0, F1); // 1.0 to F1 + + // use add to put 2..18 in F2..F18 + for ( i = 2; i <= 18; ++i ) { + __ fadd( FloatRegisterImpl::S, F1, as_FloatRegister(i-1), as_FloatRegister(i)); + } + + // Now put double 2 in F16, double 18 in F18 + __ ftof( FloatRegisterImpl::S, FloatRegisterImpl::D, F2, F16 ); + __ ftof( FloatRegisterImpl::S, FloatRegisterImpl::D, F18, F18 ); + + // use add to put 20..32 in F20..F32 + for (i = 20; i < 32; i += 2) { + __ fadd( FloatRegisterImpl::D, F16, as_FloatRegister(i-2), as_FloatRegister(i)); + } + + // put 0..7 in i's, 8..15 in l's, 16..23 in o's, 24..31 in g's + for ( i = 0; i < 8; ++i ) { + if (i < 6) { + __ set( i, as_iRegister(i)); + __ set(16 + i, as_oRegister(i)); + __ set(24 + i, as_gRegister(i)); + } + __ set( 8 + i, as_lRegister(i)); + } + + __ stop("testing stop"); + + + __ ret(); + __ delayed()->restore(); + + return start; + } + + + address generate_stop_subroutine() { + StubCodeMark mark(this, "StubRoutines", "stop_subroutine"); + address start = __ pc(); + + __ stop_subroutine(); + + return start; + } + + address generate_flush_callers_register_windows() { + StubCodeMark mark(this, "StubRoutines", "flush_callers_register_windows"); + address start = __ pc(); + + __ flushw(); + __ retl(false); + __ delayed()->add( FP, STACK_BIAS, O0 ); + // The returned value must be a stack pointer whose register save area + // is flushed, and will stay flushed while the caller executes. + + return start; + } + + // Implementation of jint atomic_xchg(jint exchange_value, volatile jint* dest) + // used by Atomic::xchg(volatile jint* dest, jint exchange_value) + // + // Arguments: + // + // exchange_value: O0 + // dest: O1 + // + // Results: + // + // O0: the value previously stored in dest + // + address generate_atomic_xchg() { + StubCodeMark mark(this, "StubRoutines", "atomic_xchg"); + address start = __ pc(); + + if (UseCASForSwap) { + // Use CAS instead of swap, just in case the MP hardware + // prefers to work with just one kind of synch. instruction. + Label retry; + __ BIND(retry); + __ mov(O0, O3); // scratch copy of exchange value + __ ld(O1, 0, O2); // observe the previous value + // try to replace O2 with O3 + __ cas(O1, O2, O3); + __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pn, retry); + + __ retl(false); + __ delayed()->mov(O2, O0); // report previous value to caller + } else { + __ retl(false); + __ delayed()->swap(O1, 0, O0); + } + + return start; + } + + + // Implementation of jint atomic_cmpxchg(jint exchange_value, volatile jint* dest, jint compare_value) + // used by Atomic::cmpxchg(volatile jint* dest, jint compare_value, jint exchange_value) + // + // Arguments: + // + // exchange_value: O0 + // dest: O1 + // compare_value: O2 + // + // Results: + // + // O0: the value previously stored in dest + // + address generate_atomic_cmpxchg() { + StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg"); + address start = __ pc(); + + // cmpxchg(dest, compare_value, exchange_value) + __ cas(O1, O2, O0); + __ retl(false); + __ delayed()->nop(); + + return start; + } + + // Implementation of jlong atomic_cmpxchg_long(jlong exchange_value, volatile jlong *dest, jlong compare_value) + // used by Atomic::cmpxchg(volatile jlong *dest, jlong compare_value, jlong exchange_value) + // + // Arguments: + // + // exchange_value: O1:O0 + // dest: O2 + // compare_value: O4:O3 + // + // Results: + // + // O1:O0: the value previously stored in dest + // + // Overwrites: G1,G2,G3 + // + address generate_atomic_cmpxchg_long() { + StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg_long"); + address start = __ pc(); + + __ sllx(O0, 32, O0); + __ srl(O1, 0, O1); + __ or3(O0,O1,O0); // O0 holds 64-bit value from compare_value + __ sllx(O3, 32, O3); + __ srl(O4, 0, O4); + __ or3(O3,O4,O3); // O3 holds 64-bit value from exchange_value + __ casx(O2, O3, O0); + __ srl(O0, 0, O1); // unpacked return value in O1:O0 + __ retl(false); + __ delayed()->srlx(O0, 32, O0); + + return start; + } + + + // Implementation of jint atomic_add(jint add_value, volatile jint* dest) + // used by Atomic::add(volatile jint* dest, jint add_value) + // + // Arguments: + // + // add_value: O0 (e.g., +1 or -1) + // dest: O1 + // + // Results: + // + // O0: the new value stored in dest + // + // Overwrites: O3 + // + address generate_atomic_add() { + StubCodeMark mark(this, "StubRoutines", "atomic_add"); + address start = __ pc(); + __ BIND(_atomic_add_stub); + + Label(retry); + __ BIND(retry); + + __ lduw(O1, 0, O2); + __ add(O0, O2, O3); + __ cas(O1, O2, O3); + __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pn, retry); + __ retl(false); + __ delayed()->add(O0, O2, O0); // note that cas made O2==O3 + + return start; + } + Label _atomic_add_stub; // called from other stubs + + + // Support for uint StubRoutine::Sparc::partial_subtype_check( Klass sub, Klass super ); + // Arguments : + // + // ret : O0, returned + // icc/xcc: set as O0 (depending on wordSize) + // sub : O1, argument, not changed + // super: O2, argument, not changed + // raddr: O7, blown by call + address generate_partial_subtype_check() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "partial_subtype_check"); + address start = __ pc(); + Label miss; + + __ save_frame(0); + Register Rret = I0; + Register Rsub = I1; + Register Rsuper = I2; + + Register L0_ary_len = L0; + Register L1_ary_ptr = L1; + Register L2_super = L2; + Register L3_index = L3; + + __ check_klass_subtype_slow_path(Rsub, Rsuper, + L0, L1, L2, L3, + NULL, &miss); + + // Match falls through here. + __ addcc(G0,0,Rret); // set Z flags, Z result + + __ ret(); // Result in Rret is zero; flags set to Z + __ delayed()->restore(); + + __ BIND(miss); + __ addcc(G0,1,Rret); // set NZ flags, NZ result + + __ ret(); // Result in Rret is != 0; flags set to NZ + __ delayed()->restore(); + + return start; + } + + + // Called from MacroAssembler::verify_oop + // + address generate_verify_oop_subroutine() { + StubCodeMark mark(this, "StubRoutines", "verify_oop_stub"); + + address start = __ pc(); + + __ verify_oop_subroutine(); + + return start; + } + + + // + // Verify that a register contains clean 32-bits positive value + // (high 32-bits are 0) so it could be used in 64-bits shifts (sllx, srax). + // + // Input: + // Rint - 32-bits value + // Rtmp - scratch + // + void assert_clean_int(Register Rint, Register Rtmp) { + #if defined(ASSERT) + __ signx(Rint, Rtmp); + __ cmp(Rint, Rtmp); + __ breakpoint_trap(Assembler::notEqual, Assembler::xcc); + #endif + } + + // + // Generate overlap test for array copy stubs + // + // Input: + // O0 - array1 + // O1 - array2 + // O2 - element count + // + // Kills temps: O3, O4 + // + void array_overlap_test(address no_overlap_target, int log2_elem_size) { + assert(no_overlap_target != NULL, "must be generated"); + array_overlap_test(no_overlap_target, NULL, log2_elem_size); + } + void array_overlap_test(Label& L_no_overlap, int log2_elem_size) { + array_overlap_test(NULL, &L_no_overlap, log2_elem_size); + } + void array_overlap_test(address no_overlap_target, Label* NOLp, int log2_elem_size) { + const Register from = O0; + const Register to = O1; + const Register count = O2; + const Register to_from = O3; // to - from + const Register byte_count = O4; // count << log2_elem_size + + __ subcc(to, from, to_from); + __ sll_ptr(count, log2_elem_size, byte_count); + if (NOLp == NULL) + __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, no_overlap_target); + else + __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, (*NOLp)); + __ delayed()->cmp(to_from, byte_count); + if (NOLp == NULL) + __ brx(Assembler::greaterEqualUnsigned, false, Assembler::pt, no_overlap_target); + else + __ brx(Assembler::greaterEqualUnsigned, false, Assembler::pt, (*NOLp)); + __ delayed()->nop(); + } + + + // + // Generate main code for disjoint arraycopy + // + typedef void (StubGenerator::*CopyLoopFunc)(Register from, Register to, Register count, int count_dec, + Label& L_loop, bool use_prefetch, bool use_bis); + + void disjoint_copy_core(Register from, Register to, Register count, int log2_elem_size, + int iter_size, StubGenerator::CopyLoopFunc copy_loop_func) { + Label L_copy; + + assert(log2_elem_size <= 3, "the following code should be changed"); + int count_dec = 16>>log2_elem_size; + + int prefetch_dist = MAX2(ArraycopySrcPrefetchDistance, ArraycopyDstPrefetchDistance); + assert(prefetch_dist < 4096, "invalid value"); + prefetch_dist = (prefetch_dist + (iter_size-1)) & (-iter_size); // round up to one iteration copy size + int prefetch_count = (prefetch_dist >> log2_elem_size); // elements count + + if (UseBlockCopy) { + Label L_block_copy, L_block_copy_prefetch, L_skip_block_copy; + + // 64 bytes tail + bytes copied in one loop iteration + int tail_size = 64 + iter_size; + int block_copy_count = (MAX2(tail_size, (int)BlockCopyLowLimit)) >> log2_elem_size; + // Use BIS copy only for big arrays since it requires membar. + __ set(block_copy_count, O4); + __ cmp_and_br_short(count, O4, Assembler::lessUnsigned, Assembler::pt, L_skip_block_copy); + // This code is for disjoint source and destination: + // to <= from || to >= from+count + // but BIS will stomp over 'from' if (to > from-tail_size && to <= from) + __ sub(from, to, O4); + __ srax(O4, 4, O4); // divide by 16 since following short branch have only 5 bits for imm. + __ cmp_and_br_short(O4, (tail_size>>4), Assembler::lessEqualUnsigned, Assembler::pn, L_skip_block_copy); + + __ wrasi(G0, Assembler::ASI_ST_BLKINIT_PRIMARY); + // BIS should not be used to copy tail (64 bytes+iter_size) + // to avoid zeroing of following values. + __ sub(count, (tail_size>>log2_elem_size), count); // count is still positive >= 0 + + if (prefetch_count > 0) { // rounded up to one iteration count + // Do prefetching only if copy size is bigger + // than prefetch distance. + __ set(prefetch_count, O4); + __ cmp_and_brx_short(count, O4, Assembler::less, Assembler::pt, L_block_copy); + __ sub(count, O4, count); + + (this->*copy_loop_func)(from, to, count, count_dec, L_block_copy_prefetch, true, true); + __ set(prefetch_count, O4); + __ add(count, O4, count); + + } // prefetch_count > 0 + + (this->*copy_loop_func)(from, to, count, count_dec, L_block_copy, false, true); + __ add(count, (tail_size>>log2_elem_size), count); // restore count + + __ wrasi(G0, Assembler::ASI_PRIMARY_NOFAULT); + // BIS needs membar. + __ membar(Assembler::StoreLoad); + // Copy tail + __ ba_short(L_copy); + + __ BIND(L_skip_block_copy); + } // UseBlockCopy + + if (prefetch_count > 0) { // rounded up to one iteration count + // Do prefetching only if copy size is bigger + // than prefetch distance. + __ set(prefetch_count, O4); + __ cmp_and_brx_short(count, O4, Assembler::lessUnsigned, Assembler::pt, L_copy); + __ sub(count, O4, count); + + Label L_copy_prefetch; + (this->*copy_loop_func)(from, to, count, count_dec, L_copy_prefetch, true, false); + __ set(prefetch_count, O4); + __ add(count, O4, count); + + } // prefetch_count > 0 + + (this->*copy_loop_func)(from, to, count, count_dec, L_copy, false, false); + } + + + + // + // Helper methods for copy_16_bytes_forward_with_shift() + // + void copy_16_bytes_shift_loop(Register from, Register to, Register count, int count_dec, + Label& L_loop, bool use_prefetch, bool use_bis) { + + const Register left_shift = G1; // left shift bit counter + const Register right_shift = G5; // right shift bit counter + + __ align(OptoLoopAlignment); + __ BIND(L_loop); + if (use_prefetch) { + if (ArraycopySrcPrefetchDistance > 0) { + __ prefetch(from, ArraycopySrcPrefetchDistance, Assembler::severalReads); + } + if (ArraycopyDstPrefetchDistance > 0) { + __ prefetch(to, ArraycopyDstPrefetchDistance, Assembler::severalWritesAndPossiblyReads); + } + } + __ ldx(from, 0, O4); + __ ldx(from, 8, G4); + __ inc(to, 16); + __ inc(from, 16); + __ deccc(count, count_dec); // Can we do next iteration after this one? + __ srlx(O4, right_shift, G3); + __ bset(G3, O3); + __ sllx(O4, left_shift, O4); + __ srlx(G4, right_shift, G3); + __ bset(G3, O4); + if (use_bis) { + __ stxa(O3, to, -16); + __ stxa(O4, to, -8); + } else { + __ stx(O3, to, -16); + __ stx(O4, to, -8); + } + __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); + __ delayed()->sllx(G4, left_shift, O3); + } + + // Copy big chunks forward with shift + // + // Inputs: + // from - source arrays + // to - destination array aligned to 8-bytes + // count - elements count to copy >= the count equivalent to 16 bytes + // count_dec - elements count's decrement equivalent to 16 bytes + // L_copy_bytes - copy exit label + // + void copy_16_bytes_forward_with_shift(Register from, Register to, + Register count, int log2_elem_size, Label& L_copy_bytes) { + Label L_aligned_copy, L_copy_last_bytes; + assert(log2_elem_size <= 3, "the following code should be changed"); + int count_dec = 16>>log2_elem_size; + + // if both arrays have the same alignment mod 8, do 8 bytes aligned copy + __ andcc(from, 7, G1); // misaligned bytes + __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); + __ delayed()->nop(); + + const Register left_shift = G1; // left shift bit counter + const Register right_shift = G5; // right shift bit counter + + __ sll(G1, LogBitsPerByte, left_shift); + __ mov(64, right_shift); + __ sub(right_shift, left_shift, right_shift); + + // + // Load 2 aligned 8-bytes chunks and use one from previous iteration + // to form 2 aligned 8-bytes chunks to store. + // + __ dec(count, count_dec); // Pre-decrement 'count' + __ andn(from, 7, from); // Align address + __ ldx(from, 0, O3); + __ inc(from, 8); + __ sllx(O3, left_shift, O3); + + disjoint_copy_core(from, to, count, log2_elem_size, 16, &StubGenerator::copy_16_bytes_shift_loop); + + __ inccc(count, count_dec>>1 ); // + 8 bytes + __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes); + __ delayed()->inc(count, count_dec>>1); // restore 'count' + + // copy 8 bytes, part of them already loaded in O3 + __ ldx(from, 0, O4); + __ inc(to, 8); + __ inc(from, 8); + __ srlx(O4, right_shift, G3); + __ bset(O3, G3); + __ stx(G3, to, -8); + + __ BIND(L_copy_last_bytes); + __ srl(right_shift, LogBitsPerByte, right_shift); // misaligned bytes + __ br(Assembler::always, false, Assembler::pt, L_copy_bytes); + __ delayed()->sub(from, right_shift, from); // restore address + + __ BIND(L_aligned_copy); + } + + // Copy big chunks backward with shift + // + // Inputs: + // end_from - source arrays end address + // end_to - destination array end address aligned to 8-bytes + // count - elements count to copy >= the count equivalent to 16 bytes + // count_dec - elements count's decrement equivalent to 16 bytes + // L_aligned_copy - aligned copy exit label + // L_copy_bytes - copy exit label + // + void copy_16_bytes_backward_with_shift(Register end_from, Register end_to, + Register count, int count_dec, + Label& L_aligned_copy, Label& L_copy_bytes) { + Label L_loop, L_copy_last_bytes; + + // if both arrays have the same alignment mod 8, do 8 bytes aligned copy + __ andcc(end_from, 7, G1); // misaligned bytes + __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); + __ delayed()->deccc(count, count_dec); // Pre-decrement 'count' + + const Register left_shift = G1; // left shift bit counter + const Register right_shift = G5; // right shift bit counter + + __ sll(G1, LogBitsPerByte, left_shift); + __ mov(64, right_shift); + __ sub(right_shift, left_shift, right_shift); + + // + // Load 2 aligned 8-bytes chunks and use one from previous iteration + // to form 2 aligned 8-bytes chunks to store. + // + __ andn(end_from, 7, end_from); // Align address + __ ldx(end_from, 0, O3); + __ align(OptoLoopAlignment); + __ BIND(L_loop); + __ ldx(end_from, -8, O4); + __ deccc(count, count_dec); // Can we do next iteration after this one? + __ ldx(end_from, -16, G4); + __ dec(end_to, 16); + __ dec(end_from, 16); + __ srlx(O3, right_shift, O3); + __ sllx(O4, left_shift, G3); + __ bset(G3, O3); + __ stx(O3, end_to, 8); + __ srlx(O4, right_shift, O4); + __ sllx(G4, left_shift, G3); + __ bset(G3, O4); + __ stx(O4, end_to, 0); + __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); + __ delayed()->mov(G4, O3); + + __ inccc(count, count_dec>>1 ); // + 8 bytes + __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes); + __ delayed()->inc(count, count_dec>>1); // restore 'count' + + // copy 8 bytes, part of them already loaded in O3 + __ ldx(end_from, -8, O4); + __ dec(end_to, 8); + __ dec(end_from, 8); + __ srlx(O3, right_shift, O3); + __ sllx(O4, left_shift, G3); + __ bset(O3, G3); + __ stx(G3, end_to, 0); + + __ BIND(L_copy_last_bytes); + __ srl(left_shift, LogBitsPerByte, left_shift); // misaligned bytes + __ br(Assembler::always, false, Assembler::pt, L_copy_bytes); + __ delayed()->add(end_from, left_shift, end_from); // restore address + } + + address generate_unsafecopy_common_error_exit() { + address start_pc = __ pc(); + if (UseBlockCopy) { + __ wrasi(G0, Assembler::ASI_PRIMARY_NOFAULT); + __ membar(Assembler::StoreLoad); + } + __ retl(); + __ delayed()->mov(G0, O0); // return 0 + return start_pc; + } + + // + // Generate stub for disjoint byte copy. If "aligned" is true, the + // "from" and "to" addresses are assumed to be heapword aligned. + // + // Arguments for generated stub: + // from: O0 + // to: O1 + // count: O2 treated as signed + // + address generate_disjoint_byte_copy(bool aligned, address *entry, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + Label L_skip_alignment, L_align; + Label L_copy_byte, L_copy_byte_loop, L_exit; + + const Register from = O0; // source array address + const Register to = O1; // destination array address + const Register count = O2; // elements count + const Register offset = O5; // offset from start of arrays + // O3, O4, G3, G4 are used as temp registers + + assert_clean_int(count, O3); // Make sure 'count' is clean int. + + if (entry != NULL) { + *entry = __ pc(); + // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) + BLOCK_COMMENT("Entry:"); + } + + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + + // for short arrays, just do single element copy + __ cmp(count, 23); // 16 + 7 + __ brx(Assembler::less, false, Assembler::pn, L_copy_byte); + __ delayed()->mov(G0, offset); + + if (aligned) { + // 'aligned' == true when it is known statically during compilation + // of this arraycopy call site that both 'from' and 'to' addresses + // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). + // + // Aligned arrays have 4 bytes alignment in 32-bits VM + // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM + // + } else { + // copy bytes to align 'to' on 8 byte boundary + __ andcc(to, 7, G1); // misaligned bytes + __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); + __ delayed()->neg(G1); + __ inc(G1, 8); // bytes need to copy to next 8-bytes alignment + __ sub(count, G1, count); + __ BIND(L_align); + __ ldub(from, 0, O3); + __ deccc(G1); + __ inc(from); + __ stb(O3, to, 0); + __ br(Assembler::notZero, false, Assembler::pt, L_align); + __ delayed()->inc(to); + __ BIND(L_skip_alignment); + } + if (!aligned) { + // Copy with shift 16 bytes per iteration if arrays do not have + // the same alignment mod 8, otherwise fall through to the next + // code for aligned copy. + // The compare above (count >= 23) guarantees 'count' >= 16 bytes. + // Also jump over aligned copy after the copy with shift completed. + + copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte); + } + + // Both array are 8 bytes aligned, copy 16 bytes at a time + __ and3(count, 7, G4); // Save count + __ srl(count, 3, count); + generate_disjoint_long_copy_core(aligned); + __ mov(G4, count); // Restore count + + // copy tailing bytes + __ BIND(L_copy_byte); + __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); + __ align(OptoLoopAlignment); + __ BIND(L_copy_byte_loop); + __ ldub(from, offset, O3); + __ deccc(count); + __ stb(O3, to, offset); + __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop); + __ delayed()->inc(offset); + } + + __ BIND(L_exit); + // O3, O4 are used as temp registers + inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4); + __ retl(); + __ delayed()->mov(G0, O0); // return 0 + return start; + } + + // + // Generate stub for conjoint byte copy. If "aligned" is true, the + // "from" and "to" addresses are assumed to be heapword aligned. + // + // Arguments for generated stub: + // from: O0 + // to: O1 + // count: O2 treated as signed + // + address generate_conjoint_byte_copy(bool aligned, address nooverlap_target, + address *entry, const char *name) { + // Do reverse copy. + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + Label L_skip_alignment, L_align, L_aligned_copy; + Label L_copy_byte, L_copy_byte_loop, L_exit; + + const Register from = O0; // source array address + const Register to = O1; // destination array address + const Register count = O2; // elements count + const Register end_from = from; // source array end address + const Register end_to = to; // destination array end address + + assert_clean_int(count, O3); // Make sure 'count' is clean int. + + if (entry != NULL) { + *entry = __ pc(); + // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) + BLOCK_COMMENT("Entry:"); + } + + array_overlap_test(nooverlap_target, 0); + + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + + __ add(to, count, end_to); // offset after last copied element + + // for short arrays, just do single element copy + __ cmp(count, 23); // 16 + 7 + __ brx(Assembler::less, false, Assembler::pn, L_copy_byte); + __ delayed()->add(from, count, end_from); + + { + // Align end of arrays since they could be not aligned even + // when arrays itself are aligned. + + // copy bytes to align 'end_to' on 8 byte boundary + __ andcc(end_to, 7, G1); // misaligned bytes + __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); + __ delayed()->nop(); + __ sub(count, G1, count); + __ BIND(L_align); + __ dec(end_from); + __ dec(end_to); + __ ldub(end_from, 0, O3); + __ deccc(G1); + __ brx(Assembler::notZero, false, Assembler::pt, L_align); + __ delayed()->stb(O3, end_to, 0); + __ BIND(L_skip_alignment); + } + if (aligned) { + // Both arrays are aligned to 8-bytes in 64-bits VM. + // The 'count' is decremented in copy_16_bytes_backward_with_shift() + // in unaligned case. + __ dec(count, 16); + } else { + // Copy with shift 16 bytes per iteration if arrays do not have + // the same alignment mod 8, otherwise jump to the next + // code for aligned copy (and subtracting 16 from 'count' before jump). + // The compare above (count >= 11) guarantees 'count' >= 16 bytes. + // Also jump over aligned copy after the copy with shift completed. + + copy_16_bytes_backward_with_shift(end_from, end_to, count, 16, + L_aligned_copy, L_copy_byte); + } + // copy 4 elements (16 bytes) at a time + __ align(OptoLoopAlignment); + __ BIND(L_aligned_copy); + __ dec(end_from, 16); + __ ldx(end_from, 8, O3); + __ ldx(end_from, 0, O4); + __ dec(end_to, 16); + __ deccc(count, 16); + __ stx(O3, end_to, 8); + __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); + __ delayed()->stx(O4, end_to, 0); + __ inc(count, 16); + + // copy 1 element (2 bytes) at a time + __ BIND(L_copy_byte); + __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); + __ align(OptoLoopAlignment); + __ BIND(L_copy_byte_loop); + __ dec(end_from); + __ dec(end_to); + __ ldub(end_from, 0, O4); + __ deccc(count); + __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop); + __ delayed()->stb(O4, end_to, 0); + } + + __ BIND(L_exit); + // O3, O4 are used as temp registers + inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4); + __ retl(); + __ delayed()->mov(G0, O0); // return 0 + return start; + } + + // + // Generate stub for disjoint short copy. If "aligned" is true, the + // "from" and "to" addresses are assumed to be heapword aligned. + // + // Arguments for generated stub: + // from: O0 + // to: O1 + // count: O2 treated as signed + // + address generate_disjoint_short_copy(bool aligned, address *entry, const char * name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + Label L_skip_alignment, L_skip_alignment2; + Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit; + + const Register from = O0; // source array address + const Register to = O1; // destination array address + const Register count = O2; // elements count + const Register offset = O5; // offset from start of arrays + // O3, O4, G3, G4 are used as temp registers + + assert_clean_int(count, O3); // Make sure 'count' is clean int. + + if (entry != NULL) { + *entry = __ pc(); + // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) + BLOCK_COMMENT("Entry:"); + } + + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + // for short arrays, just do single element copy + __ cmp(count, 11); // 8 + 3 (22 bytes) + __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes); + __ delayed()->mov(G0, offset); + + if (aligned) { + // 'aligned' == true when it is known statically during compilation + // of this arraycopy call site that both 'from' and 'to' addresses + // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). + // + // Aligned arrays have 4 bytes alignment in 32-bits VM + // and 8 bytes - in 64-bits VM. + // + } else { + // copy 1 element if necessary to align 'to' on an 4 bytes + __ andcc(to, 3, G0); + __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); + __ delayed()->lduh(from, 0, O3); + __ inc(from, 2); + __ inc(to, 2); + __ dec(count); + __ sth(O3, to, -2); + __ BIND(L_skip_alignment); + + // copy 2 elements to align 'to' on an 8 byte boundary + __ andcc(to, 7, G0); + __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); + __ delayed()->lduh(from, 0, O3); + __ dec(count, 2); + __ lduh(from, 2, O4); + __ inc(from, 4); + __ inc(to, 4); + __ sth(O3, to, -4); + __ sth(O4, to, -2); + __ BIND(L_skip_alignment2); + } + if (!aligned) { + // Copy with shift 16 bytes per iteration if arrays do not have + // the same alignment mod 8, otherwise fall through to the next + // code for aligned copy. + // The compare above (count >= 11) guarantees 'count' >= 16 bytes. + // Also jump over aligned copy after the copy with shift completed. + + copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes); + } + + // Both array are 8 bytes aligned, copy 16 bytes at a time + __ and3(count, 3, G4); // Save + __ srl(count, 2, count); + generate_disjoint_long_copy_core(aligned); + __ mov(G4, count); // restore + + // copy 1 element at a time + __ BIND(L_copy_2_bytes); + __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); + __ align(OptoLoopAlignment); + __ BIND(L_copy_2_bytes_loop); + __ lduh(from, offset, O3); + __ deccc(count); + __ sth(O3, to, offset); + __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop); + __ delayed()->inc(offset, 2); + } + + __ BIND(L_exit); + // O3, O4 are used as temp registers + inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4); + __ retl(); + __ delayed()->mov(G0, O0); // return 0 + return start; + } + + // + // Generate stub for disjoint short fill. If "aligned" is true, the + // "to" address is assumed to be heapword aligned. + // + // Arguments for generated stub: + // to: O0 + // value: O1 + // count: O2 treated as signed + // + address generate_fill(BasicType t, bool aligned, const char* name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + const Register to = O0; // source array address + const Register value = O1; // fill value + const Register count = O2; // elements count + // O3 is used as a temp register + + assert_clean_int(count, O3); // Make sure 'count' is clean int. + + Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; + Label L_fill_2_bytes, L_fill_elements, L_fill_32_bytes; + + int shift = -1; + switch (t) { + case T_BYTE: + shift = 2; + break; + case T_SHORT: + shift = 1; + break; + case T_INT: + shift = 0; + break; + default: ShouldNotReachHere(); + } + + BLOCK_COMMENT("Entry:"); + + if (t == T_BYTE) { + // Zero extend value + __ and3(value, 0xff, value); + __ sllx(value, 8, O3); + __ or3(value, O3, value); + } + if (t == T_SHORT) { + // Zero extend value + __ sllx(value, 48, value); + __ srlx(value, 48, value); + } + if (t == T_BYTE || t == T_SHORT) { + __ sllx(value, 16, O3); + __ or3(value, O3, value); + } + + __ cmp(count, 2<andcc(count, 1, G0); + + if (!aligned && (t == T_BYTE || t == T_SHORT)) { + // align source address at 4 bytes address boundary + if (t == T_BYTE) { + // One byte misalignment happens only for byte arrays + __ andcc(to, 1, G0); + __ br(Assembler::zero, false, Assembler::pt, L_skip_align1); + __ delayed()->nop(); + __ stb(value, to, 0); + __ inc(to, 1); + __ dec(count, 1); + __ BIND(L_skip_align1); + } + // Two bytes misalignment happens only for byte and short (char) arrays + __ andcc(to, 2, G0); + __ br(Assembler::zero, false, Assembler::pt, L_skip_align2); + __ delayed()->nop(); + __ sth(value, to, 0); + __ inc(to, 2); + __ dec(count, 1 << (shift - 1)); + __ BIND(L_skip_align2); + } + if (!aligned) { + // align to 8 bytes, we know we are 4 byte aligned to start + __ andcc(to, 7, G0); + __ br(Assembler::zero, false, Assembler::pt, L_fill_32_bytes); + __ delayed()->nop(); + __ stw(value, to, 0); + __ inc(to, 4); + __ dec(count, 1 << shift); + __ BIND(L_fill_32_bytes); + } + + if (t == T_INT) { + // Zero extend value + __ srl(value, 0, value); + } + if (t == T_BYTE || t == T_SHORT || t == T_INT) { + __ sllx(value, 32, O3); + __ or3(value, O3, value); + } + + Label L_check_fill_8_bytes; + // Fill 32-byte chunks + __ subcc(count, 8 << shift, count); + __ brx(Assembler::less, false, Assembler::pt, L_check_fill_8_bytes); + __ delayed()->nop(); + + Label L_fill_32_bytes_loop, L_fill_4_bytes; + __ align(16); + __ BIND(L_fill_32_bytes_loop); + + __ stx(value, to, 0); + __ stx(value, to, 8); + __ stx(value, to, 16); + __ stx(value, to, 24); + + __ subcc(count, 8 << shift, count); + __ brx(Assembler::greaterEqual, false, Assembler::pt, L_fill_32_bytes_loop); + __ delayed()->add(to, 32, to); + + __ BIND(L_check_fill_8_bytes); + __ addcc(count, 8 << shift, count); + __ brx(Assembler::zero, false, Assembler::pn, L_exit); + __ delayed()->subcc(count, 1 << (shift + 1), count); + __ brx(Assembler::less, false, Assembler::pn, L_fill_4_bytes); + __ delayed()->andcc(count, 1<add(to, 8, to); + + // fill trailing 4 bytes + __ andcc(count, 1<andcc(count, 1<<(shift-1), G0); + } else { + __ delayed()->nop(); + } + __ stw(value, to, 0); + if (t == T_BYTE || t == T_SHORT) { + __ inc(to, 4); + // fill trailing 2 bytes + __ andcc(count, 1<<(shift-1), G0); // in delay slot of branches + __ BIND(L_fill_2_bytes); + __ brx(Assembler::zero, false, Assembler::pt, L_fill_byte); + __ delayed()->andcc(count, 1, count); + __ sth(value, to, 0); + if (t == T_BYTE) { + __ inc(to, 2); + // fill trailing byte + __ andcc(count, 1, count); // in delay slot of branches + __ BIND(L_fill_byte); + __ brx(Assembler::zero, false, Assembler::pt, L_exit); + __ delayed()->nop(); + __ stb(value, to, 0); + } else { + __ BIND(L_fill_byte); + } + } else { + __ BIND(L_fill_2_bytes); + } + __ BIND(L_exit); + __ retl(); + __ delayed()->nop(); + + // Handle copies less than 8 bytes. Int is handled elsewhere. + if (t == T_BYTE) { + __ BIND(L_fill_elements); + Label L_fill_2, L_fill_4; + // in delay slot __ andcc(count, 1, G0); + __ brx(Assembler::zero, false, Assembler::pt, L_fill_2); + __ delayed()->andcc(count, 2, G0); + __ stb(value, to, 0); + __ inc(to, 1); + __ BIND(L_fill_2); + __ brx(Assembler::zero, false, Assembler::pt, L_fill_4); + __ delayed()->andcc(count, 4, G0); + __ stb(value, to, 0); + __ stb(value, to, 1); + __ inc(to, 2); + __ BIND(L_fill_4); + __ brx(Assembler::zero, false, Assembler::pt, L_exit); + __ delayed()->nop(); + __ stb(value, to, 0); + __ stb(value, to, 1); + __ stb(value, to, 2); + __ retl(); + __ delayed()->stb(value, to, 3); + } + + if (t == T_SHORT) { + Label L_fill_2; + __ BIND(L_fill_elements); + // in delay slot __ andcc(count, 1, G0); + __ brx(Assembler::zero, false, Assembler::pt, L_fill_2); + __ delayed()->andcc(count, 2, G0); + __ sth(value, to, 0); + __ inc(to, 2); + __ BIND(L_fill_2); + __ brx(Assembler::zero, false, Assembler::pt, L_exit); + __ delayed()->nop(); + __ sth(value, to, 0); + __ retl(); + __ delayed()->sth(value, to, 2); + } + return start; + } + + // + // Generate stub for conjoint short copy. If "aligned" is true, the + // "from" and "to" addresses are assumed to be heapword aligned. + // + // Arguments for generated stub: + // from: O0 + // to: O1 + // count: O2 treated as signed + // + address generate_conjoint_short_copy(bool aligned, address nooverlap_target, + address *entry, const char *name) { + // Do reverse copy. + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + Label L_skip_alignment, L_skip_alignment2, L_aligned_copy; + Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit; + + const Register from = O0; // source array address + const Register to = O1; // destination array address + const Register count = O2; // elements count + const Register end_from = from; // source array end address + const Register end_to = to; // destination array end address + + const Register byte_count = O3; // bytes count to copy + + assert_clean_int(count, O3); // Make sure 'count' is clean int. + + if (entry != NULL) { + *entry = __ pc(); + // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) + BLOCK_COMMENT("Entry:"); + } + + array_overlap_test(nooverlap_target, 1); + + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + + __ sllx(count, LogBytesPerShort, byte_count); + __ add(to, byte_count, end_to); // offset after last copied element + + // for short arrays, just do single element copy + __ cmp(count, 11); // 8 + 3 (22 bytes) + __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes); + __ delayed()->add(from, byte_count, end_from); + + { + // Align end of arrays since they could be not aligned even + // when arrays itself are aligned. + + // copy 1 element if necessary to align 'end_to' on an 4 bytes + __ andcc(end_to, 3, G0); + __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); + __ delayed()->lduh(end_from, -2, O3); + __ dec(end_from, 2); + __ dec(end_to, 2); + __ dec(count); + __ sth(O3, end_to, 0); + __ BIND(L_skip_alignment); + + // copy 2 elements to align 'end_to' on an 8 byte boundary + __ andcc(end_to, 7, G0); + __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); + __ delayed()->lduh(end_from, -2, O3); + __ dec(count, 2); + __ lduh(end_from, -4, O4); + __ dec(end_from, 4); + __ dec(end_to, 4); + __ sth(O3, end_to, 2); + __ sth(O4, end_to, 0); + __ BIND(L_skip_alignment2); + } + if (aligned) { + // Both arrays are aligned to 8-bytes in 64-bits VM. + // The 'count' is decremented in copy_16_bytes_backward_with_shift() + // in unaligned case. + __ dec(count, 8); + } else { + // Copy with shift 16 bytes per iteration if arrays do not have + // the same alignment mod 8, otherwise jump to the next + // code for aligned copy (and subtracting 8 from 'count' before jump). + // The compare above (count >= 11) guarantees 'count' >= 16 bytes. + // Also jump over aligned copy after the copy with shift completed. + + copy_16_bytes_backward_with_shift(end_from, end_to, count, 8, + L_aligned_copy, L_copy_2_bytes); + } + // copy 4 elements (16 bytes) at a time + __ align(OptoLoopAlignment); + __ BIND(L_aligned_copy); + __ dec(end_from, 16); + __ ldx(end_from, 8, O3); + __ ldx(end_from, 0, O4); + __ dec(end_to, 16); + __ deccc(count, 8); + __ stx(O3, end_to, 8); + __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); + __ delayed()->stx(O4, end_to, 0); + __ inc(count, 8); + + // copy 1 element (2 bytes) at a time + __ BIND(L_copy_2_bytes); + __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); + __ BIND(L_copy_2_bytes_loop); + __ dec(end_from, 2); + __ dec(end_to, 2); + __ lduh(end_from, 0, O4); + __ deccc(count); + __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop); + __ delayed()->sth(O4, end_to, 0); + } + __ BIND(L_exit); + // O3, O4 are used as temp registers + inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4); + __ retl(); + __ delayed()->mov(G0, O0); // return 0 + return start; + } + + // + // Helper methods for generate_disjoint_int_copy_core() + // + void copy_16_bytes_loop(Register from, Register to, Register count, int count_dec, + Label& L_loop, bool use_prefetch, bool use_bis) { + + __ align(OptoLoopAlignment); + __ BIND(L_loop); + if (use_prefetch) { + if (ArraycopySrcPrefetchDistance > 0) { + __ prefetch(from, ArraycopySrcPrefetchDistance, Assembler::severalReads); + } + if (ArraycopyDstPrefetchDistance > 0) { + __ prefetch(to, ArraycopyDstPrefetchDistance, Assembler::severalWritesAndPossiblyReads); + } + } + __ ldx(from, 4, O4); + __ ldx(from, 12, G4); + __ inc(to, 16); + __ inc(from, 16); + __ deccc(count, 4); // Can we do next iteration after this one? + + __ srlx(O4, 32, G3); + __ bset(G3, O3); + __ sllx(O4, 32, O4); + __ srlx(G4, 32, G3); + __ bset(G3, O4); + if (use_bis) { + __ stxa(O3, to, -16); + __ stxa(O4, to, -8); + } else { + __ stx(O3, to, -16); + __ stx(O4, to, -8); + } + __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); + __ delayed()->sllx(G4, 32, O3); + + } + + // + // Generate core code for disjoint int copy (and oop copy on 32-bit). + // If "aligned" is true, the "from" and "to" addresses are assumed + // to be heapword aligned. + // + // Arguments: + // from: O0 + // to: O1 + // count: O2 treated as signed + // + void generate_disjoint_int_copy_core(bool aligned) { + + Label L_skip_alignment, L_aligned_copy; + Label L_copy_4_bytes, L_copy_4_bytes_loop, L_exit; + + const Register from = O0; // source array address + const Register to = O1; // destination array address + const Register count = O2; // elements count + const Register offset = O5; // offset from start of arrays + // O3, O4, G3, G4 are used as temp registers + + // 'aligned' == true when it is known statically during compilation + // of this arraycopy call site that both 'from' and 'to' addresses + // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). + // + // Aligned arrays have 4 bytes alignment in 32-bits VM + // and 8 bytes - in 64-bits VM. + // + if (!aligned) { + // The next check could be put under 'ifndef' since the code in + // generate_disjoint_long_copy_core() has own checks and set 'offset'. + + // for short arrays, just do single element copy + __ cmp(count, 5); // 4 + 1 (20 bytes) + __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes); + __ delayed()->mov(G0, offset); + + // copy 1 element to align 'to' on an 8 byte boundary + __ andcc(to, 7, G0); + __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); + __ delayed()->ld(from, 0, O3); + __ inc(from, 4); + __ inc(to, 4); + __ dec(count); + __ st(O3, to, -4); + __ BIND(L_skip_alignment); + + // if arrays have same alignment mod 8, do 4 elements copy + __ andcc(from, 7, G0); + __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); + __ delayed()->ld(from, 0, O3); + + // + // Load 2 aligned 8-bytes chunks and use one from previous iteration + // to form 2 aligned 8-bytes chunks to store. + // + // copy_16_bytes_forward_with_shift() is not used here since this + // code is more optimal. + + // copy with shift 4 elements (16 bytes) at a time + __ dec(count, 4); // The cmp at the beginning guaranty count >= 4 + __ sllx(O3, 32, O3); + + disjoint_copy_core(from, to, count, 2, 16, &StubGenerator::copy_16_bytes_loop); + + __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes); + __ delayed()->inc(count, 4); // restore 'count' + + __ BIND(L_aligned_copy); + } // !aligned + + // copy 4 elements (16 bytes) at a time + __ and3(count, 1, G4); // Save + __ srl(count, 1, count); + generate_disjoint_long_copy_core(aligned); + __ mov(G4, count); // Restore + + // copy 1 element at a time + __ BIND(L_copy_4_bytes); + __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); + __ BIND(L_copy_4_bytes_loop); + __ ld(from, offset, O3); + __ deccc(count); + __ st(O3, to, offset); + __ brx(Assembler::notZero, false, Assembler::pt, L_copy_4_bytes_loop); + __ delayed()->inc(offset, 4); + __ BIND(L_exit); + } + + // + // Generate stub for disjoint int copy. If "aligned" is true, the + // "from" and "to" addresses are assumed to be heapword aligned. + // + // Arguments for generated stub: + // from: O0 + // to: O1 + // count: O2 treated as signed + // + address generate_disjoint_int_copy(bool aligned, address *entry, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + const Register count = O2; + assert_clean_int(count, O3); // Make sure 'count' is clean int. + + if (entry != NULL) { + *entry = __ pc(); + // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) + BLOCK_COMMENT("Entry:"); + } + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + generate_disjoint_int_copy_core(aligned); + } + // O3, O4 are used as temp registers + inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4); + __ retl(); + __ delayed()->mov(G0, O0); // return 0 + return start; + } + + // + // Generate core code for conjoint int copy (and oop copy on 32-bit). + // If "aligned" is true, the "from" and "to" addresses are assumed + // to be heapword aligned. + // + // Arguments: + // from: O0 + // to: O1 + // count: O2 treated as signed + // + void generate_conjoint_int_copy_core(bool aligned) { + // Do reverse copy. + + Label L_skip_alignment, L_aligned_copy; + Label L_copy_16_bytes, L_copy_4_bytes, L_copy_4_bytes_loop, L_exit; + + const Register from = O0; // source array address + const Register to = O1; // destination array address + const Register count = O2; // elements count + const Register end_from = from; // source array end address + const Register end_to = to; // destination array end address + // O3, O4, O5, G3 are used as temp registers + + const Register byte_count = O3; // bytes count to copy + + __ sllx(count, LogBytesPerInt, byte_count); + __ add(to, byte_count, end_to); // offset after last copied element + + __ cmp(count, 5); // for short arrays, just do single element copy + __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes); + __ delayed()->add(from, byte_count, end_from); + + // copy 1 element to align 'to' on an 8 byte boundary + __ andcc(end_to, 7, G0); + __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); + __ delayed()->nop(); + __ dec(count); + __ dec(end_from, 4); + __ dec(end_to, 4); + __ ld(end_from, 0, O4); + __ st(O4, end_to, 0); + __ BIND(L_skip_alignment); + + // Check if 'end_from' and 'end_to' has the same alignment. + __ andcc(end_from, 7, G0); + __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); + __ delayed()->dec(count, 4); // The cmp at the start guaranty cnt >= 4 + + // copy with shift 4 elements (16 bytes) at a time + // + // Load 2 aligned 8-bytes chunks and use one from previous iteration + // to form 2 aligned 8-bytes chunks to store. + // + __ ldx(end_from, -4, O3); + __ align(OptoLoopAlignment); + __ BIND(L_copy_16_bytes); + __ ldx(end_from, -12, O4); + __ deccc(count, 4); + __ ldx(end_from, -20, O5); + __ dec(end_to, 16); + __ dec(end_from, 16); + __ srlx(O3, 32, O3); + __ sllx(O4, 32, G3); + __ bset(G3, O3); + __ stx(O3, end_to, 8); + __ srlx(O4, 32, O4); + __ sllx(O5, 32, G3); + __ bset(O4, G3); + __ stx(G3, end_to, 0); + __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes); + __ delayed()->mov(O5, O3); + + __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes); + __ delayed()->inc(count, 4); + + // copy 4 elements (16 bytes) at a time + __ align(OptoLoopAlignment); + __ BIND(L_aligned_copy); + __ dec(end_from, 16); + __ ldx(end_from, 8, O3); + __ ldx(end_from, 0, O4); + __ dec(end_to, 16); + __ deccc(count, 4); + __ stx(O3, end_to, 8); + __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); + __ delayed()->stx(O4, end_to, 0); + __ inc(count, 4); + + // copy 1 element (4 bytes) at a time + __ BIND(L_copy_4_bytes); + __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); + __ BIND(L_copy_4_bytes_loop); + __ dec(end_from, 4); + __ dec(end_to, 4); + __ ld(end_from, 0, O4); + __ deccc(count); + __ brx(Assembler::greater, false, Assembler::pt, L_copy_4_bytes_loop); + __ delayed()->st(O4, end_to, 0); + __ BIND(L_exit); + } + + // + // Generate stub for conjoint int copy. If "aligned" is true, the + // "from" and "to" addresses are assumed to be heapword aligned. + // + // Arguments for generated stub: + // from: O0 + // to: O1 + // count: O2 treated as signed + // + address generate_conjoint_int_copy(bool aligned, address nooverlap_target, + address *entry, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + assert_clean_int(O2, O3); // Make sure 'count' is clean int. + + if (entry != NULL) { + *entry = __ pc(); + // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) + BLOCK_COMMENT("Entry:"); + } + + array_overlap_test(nooverlap_target, 2); + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + generate_conjoint_int_copy_core(aligned); + } + // O3, O4 are used as temp registers + inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4); + __ retl(); + __ delayed()->mov(G0, O0); // return 0 + return start; + } + + // + // Helper methods for generate_disjoint_long_copy_core() + // + void copy_64_bytes_loop(Register from, Register to, Register count, int count_dec, + Label& L_loop, bool use_prefetch, bool use_bis) { + __ align(OptoLoopAlignment); + __ BIND(L_loop); + for (int off = 0; off < 64; off += 16) { + if (use_prefetch && (off & 31) == 0) { + if (ArraycopySrcPrefetchDistance > 0) { + __ prefetch(from, ArraycopySrcPrefetchDistance+off, Assembler::severalReads); + } + if (ArraycopyDstPrefetchDistance > 0) { + __ prefetch(to, ArraycopyDstPrefetchDistance+off, Assembler::severalWritesAndPossiblyReads); + } + } + __ ldx(from, off+0, O4); + __ ldx(from, off+8, O5); + if (use_bis) { + __ stxa(O4, to, off+0); + __ stxa(O5, to, off+8); + } else { + __ stx(O4, to, off+0); + __ stx(O5, to, off+8); + } + } + __ deccc(count, 8); + __ inc(from, 64); + __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); + __ delayed()->inc(to, 64); + } + + // + // Generate core code for disjoint long copy (and oop copy on 64-bit). + // "aligned" is ignored, because we must make the stronger + // assumption that both addresses are always 64-bit aligned. + // + // Arguments: + // from: O0 + // to: O1 + // count: O2 treated as signed + // + // count -= 2; + // if ( count >= 0 ) { // >= 2 elements + // if ( count > 6) { // >= 8 elements + // count -= 6; // original count - 8 + // do { + // copy_8_elements; + // count -= 8; + // } while ( count >= 0 ); + // count += 6; + // } + // if ( count >= 0 ) { // >= 2 elements + // do { + // copy_2_elements; + // } while ( (count=count-2) >= 0 ); + // } + // } + // count += 2; + // if ( count != 0 ) { // 1 element left + // copy_1_element; + // } + // + void generate_disjoint_long_copy_core(bool aligned) { + Label L_copy_8_bytes, L_copy_16_bytes, L_exit; + const Register from = O0; // source array address + const Register to = O1; // destination array address + const Register count = O2; // elements count + const Register offset0 = O4; // element offset + const Register offset8 = O5; // next element offset + + __ deccc(count, 2); + __ mov(G0, offset0); // offset from start of arrays (0) + __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); + __ delayed()->add(offset0, 8, offset8); + + // Copy by 64 bytes chunks + + const Register from64 = O3; // source address + const Register to64 = G3; // destination address + __ subcc(count, 6, O3); + __ brx(Assembler::negative, false, Assembler::pt, L_copy_16_bytes ); + __ delayed()->mov(to, to64); + // Now we can use O4(offset0), O5(offset8) as temps + __ mov(O3, count); + // count >= 0 (original count - 8) + __ mov(from, from64); + + disjoint_copy_core(from64, to64, count, 3, 64, &StubGenerator::copy_64_bytes_loop); + + // Restore O4(offset0), O5(offset8) + __ sub(from64, from, offset0); + __ inccc(count, 6); // restore count + __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); + __ delayed()->add(offset0, 8, offset8); + + // Copy by 16 bytes chunks + __ align(OptoLoopAlignment); + __ BIND(L_copy_16_bytes); + __ ldx(from, offset0, O3); + __ ldx(from, offset8, G3); + __ deccc(count, 2); + __ stx(O3, to, offset0); + __ inc(offset0, 16); + __ stx(G3, to, offset8); + __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes); + __ delayed()->inc(offset8, 16); + + // Copy last 8 bytes + __ BIND(L_copy_8_bytes); + __ inccc(count, 2); + __ brx(Assembler::zero, true, Assembler::pn, L_exit ); + __ delayed()->mov(offset0, offset8); // Set O5 used by other stubs + __ ldx(from, offset0, O3); + __ stx(O3, to, offset0); + __ BIND(L_exit); + } + + // + // Generate stub for disjoint long copy. + // "aligned" is ignored, because we must make the stronger + // assumption that both addresses are always 64-bit aligned. + // + // Arguments for generated stub: + // from: O0 + // to: O1 + // count: O2 treated as signed + // + address generate_disjoint_long_copy(bool aligned, address *entry, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + assert_clean_int(O2, O3); // Make sure 'count' is clean int. + + if (entry != NULL) { + *entry = __ pc(); + // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) + BLOCK_COMMENT("Entry:"); + } + + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, true, false); + generate_disjoint_long_copy_core(aligned); + } + // O3, O4 are used as temp registers + inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4); + __ retl(); + __ delayed()->mov(G0, O0); // return 0 + return start; + } + + // + // Generate core code for conjoint long copy (and oop copy on 64-bit). + // "aligned" is ignored, because we must make the stronger + // assumption that both addresses are always 64-bit aligned. + // + // Arguments: + // from: O0 + // to: O1 + // count: O2 treated as signed + // + void generate_conjoint_long_copy_core(bool aligned) { + // Do reverse copy. + Label L_copy_8_bytes, L_copy_16_bytes, L_exit; + const Register from = O0; // source array address + const Register to = O1; // destination array address + const Register count = O2; // elements count + const Register offset8 = O4; // element offset + const Register offset0 = O5; // previous element offset + + __ subcc(count, 1, count); + __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_8_bytes ); + __ delayed()->sllx(count, LogBytesPerLong, offset8); + __ sub(offset8, 8, offset0); + __ align(OptoLoopAlignment); + __ BIND(L_copy_16_bytes); + __ ldx(from, offset8, O2); + __ ldx(from, offset0, O3); + __ stx(O2, to, offset8); + __ deccc(offset8, 16); // use offset8 as counter + __ stx(O3, to, offset0); + __ brx(Assembler::greater, false, Assembler::pt, L_copy_16_bytes); + __ delayed()->dec(offset0, 16); + + __ BIND(L_copy_8_bytes); + __ brx(Assembler::negative, false, Assembler::pn, L_exit ); + __ delayed()->nop(); + __ ldx(from, 0, O3); + __ stx(O3, to, 0); + __ BIND(L_exit); + } + + // Generate stub for conjoint long copy. + // "aligned" is ignored, because we must make the stronger + // assumption that both addresses are always 64-bit aligned. + // + // Arguments for generated stub: + // from: O0 + // to: O1 + // count: O2 treated as signed + // + address generate_conjoint_long_copy(bool aligned, address nooverlap_target, + address *entry, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + assert(aligned, "Should always be aligned"); + + assert_clean_int(O2, O3); // Make sure 'count' is clean int. + + if (entry != NULL) { + *entry = __ pc(); + // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) + BLOCK_COMMENT("Entry:"); + } + + array_overlap_test(nooverlap_target, 3); + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, true, false); + generate_conjoint_long_copy_core(aligned); + } + // O3, O4 are used as temp registers + inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4); + __ retl(); + __ delayed()->mov(G0, O0); // return 0 + return start; + } + + // Generate stub for disjoint oop copy. If "aligned" is true, the + // "from" and "to" addresses are assumed to be heapword aligned. + // + // Arguments for generated stub: + // from: O0 + // to: O1 + // count: O2 treated as signed + // + address generate_disjoint_oop_copy(bool aligned, address *entry, const char *name, + bool dest_uninitialized = false) { + + const Register from = O0; // source array address + const Register to = O1; // destination array address + const Register count = O2; // elements count + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + assert_clean_int(count, O3); // Make sure 'count' is clean int. + + if (entry != NULL) { + *entry = __ pc(); + // caller can pass a 64-bit byte count here + BLOCK_COMMENT("Entry:"); + } + + DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; + if (dest_uninitialized) { + decorators |= IS_DEST_UNINITIALIZED; + } + if (aligned) { + decorators |= ARRAYCOPY_ALIGNED; + } + + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->arraycopy_prologue(_masm, decorators, T_OBJECT, from, to, count); + + assert_clean_int(count, O3); // Make sure 'count' is clean int. + if (UseCompressedOops) { + generate_disjoint_int_copy_core(aligned); + } else { + generate_disjoint_long_copy_core(aligned); + } + + bs->arraycopy_epilogue(_masm, decorators, T_OBJECT, from, to, count); + + // O3, O4 are used as temp registers + inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4); + __ retl(); + __ delayed()->mov(G0, O0); // return 0 + return start; + } + + // Generate stub for conjoint oop copy. If "aligned" is true, the + // "from" and "to" addresses are assumed to be heapword aligned. + // + // Arguments for generated stub: + // from: O0 + // to: O1 + // count: O2 treated as signed + // + address generate_conjoint_oop_copy(bool aligned, address nooverlap_target, + address *entry, const char *name, + bool dest_uninitialized = false) { + + const Register from = O0; // source array address + const Register to = O1; // destination array address + const Register count = O2; // elements count + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + assert_clean_int(count, O3); // Make sure 'count' is clean int. + + if (entry != NULL) { + *entry = __ pc(); + // caller can pass a 64-bit byte count here + BLOCK_COMMENT("Entry:"); + } + + array_overlap_test(nooverlap_target, LogBytesPerHeapOop); + + DecoratorSet decorators = IN_HEAP | IS_ARRAY; + if (dest_uninitialized) { + decorators |= IS_DEST_UNINITIALIZED; + } + if (aligned) { + decorators |= ARRAYCOPY_ALIGNED; + } + + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->arraycopy_prologue(_masm, decorators, T_OBJECT, from, to, count); + + if (UseCompressedOops) { + generate_conjoint_int_copy_core(aligned); + } else { + generate_conjoint_long_copy_core(aligned); + } + + bs->arraycopy_epilogue(_masm, decorators, T_OBJECT, from, to, count); + + // O3, O4 are used as temp registers + inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4); + __ retl(); + __ delayed()->mov(G0, O0); // return 0 + return start; + } + + + // Helper for generating a dynamic type check. + // Smashes only the given temp registers. + void generate_type_check(Register sub_klass, + Register super_check_offset, + Register super_klass, + Register temp, + Label& L_success) { + assert_different_registers(sub_klass, super_check_offset, super_klass, temp); + + BLOCK_COMMENT("type_check:"); + + Label L_miss, L_pop_to_miss; + + assert_clean_int(super_check_offset, temp); + + __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg, + &L_success, &L_miss, NULL, + super_check_offset); + + BLOCK_COMMENT("type_check_slow_path:"); + __ save_frame(0); + __ check_klass_subtype_slow_path(sub_klass->after_save(), + super_klass->after_save(), + L0, L1, L2, L4, + NULL, &L_pop_to_miss); + __ ba(L_success); + __ delayed()->restore(); + + __ bind(L_pop_to_miss); + __ restore(); + + // Fall through on failure! + __ BIND(L_miss); + } + + + // Generate stub for checked oop copy. + // + // Arguments for generated stub: + // from: O0 + // to: O1 + // count: O2 treated as signed + // ckoff: O3 (super_check_offset) + // ckval: O4 (super_klass) + // ret: O0 zero for success; (-1^K) where K is partial transfer count + // + address generate_checkcast_copy(const char *name, address *entry, bool dest_uninitialized = false) { + + const Register O0_from = O0; // source array address + const Register O1_to = O1; // destination array address + const Register O2_count = O2; // elements count + const Register O3_ckoff = O3; // super_check_offset + const Register O4_ckval = O4; // super_klass + + const Register O5_offset = O5; // loop var, with stride wordSize + const Register G1_remain = G1; // loop var, with stride -1 + const Register G3_oop = G3; // actual oop copied + const Register G4_klass = G4; // oop._klass + const Register G5_super = G5; // oop._klass._primary_supers[ckval] + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + +#ifdef ASSERT + // We sometimes save a frame (see generate_type_check below). + // If this will cause trouble, let's fail now instead of later. + __ save_frame(0); + __ restore(); +#endif + + assert_clean_int(O2_count, G1); // Make sure 'count' is clean int. + +#ifdef ASSERT + // caller guarantees that the arrays really are different + // otherwise, we would have to make conjoint checks + { Label L; + __ mov(O3, G1); // spill: overlap test smashes O3 + __ mov(O4, G4); // spill: overlap test smashes O4 + array_overlap_test(L, LogBytesPerHeapOop); + __ stop("checkcast_copy within a single array"); + __ bind(L); + __ mov(G1, O3); + __ mov(G4, O4); + } +#endif //ASSERT + + if (entry != NULL) { + *entry = __ pc(); + // caller can pass a 64-bit byte count here (from generic stub) + BLOCK_COMMENT("Entry:"); + } + + DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST; + if (dest_uninitialized) { + decorators |= IS_DEST_UNINITIALIZED; + } + + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->arraycopy_prologue(_masm, decorators, T_OBJECT, O0_from, O1_to, O2_count); + + Label load_element, store_element, do_epilogue, fail, done; + __ addcc(O2_count, 0, G1_remain); // initialize loop index, and test it + __ brx(Assembler::notZero, false, Assembler::pt, load_element); + __ delayed()->mov(G0, O5_offset); // offset from start of arrays + + // Empty array: Nothing to do. + inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4); + __ retl(); + __ delayed()->set(0, O0); // return 0 on (trivial) success + + // ======== begin loop ======== + // (Loop is rotated; its entry is load_element.) + // Loop variables: + // (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays + // (O2 = len; O2 != 0; O2--) --- number of oops *remaining* + // G3, G4, G5 --- current oop, oop.klass, oop.klass.super + __ align(OptoLoopAlignment); + + __ BIND(store_element); + __ deccc(G1_remain); // decrement the count + __ store_heap_oop(G3_oop, O1_to, O5_offset, noreg, AS_RAW); // store the oop + __ inc(O5_offset, heapOopSize); // step to next offset + __ brx(Assembler::zero, true, Assembler::pt, do_epilogue); + __ delayed()->set(0, O0); // return -1 on success + + // ======== loop entry is here ======== + __ BIND(load_element); + __ load_heap_oop(O0_from, O5_offset, G3_oop, noreg, AS_RAW); // load the oop + __ br_null_short(G3_oop, Assembler::pt, store_element); + + __ load_klass(G3_oop, G4_klass); // query the object klass + + generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super, + // branch to this on success: + store_element); + // ======== end loop ======== + + // It was a real error; we must depend on the caller to finish the job. + // Register G1 has number of *remaining* oops, O2 number of *total* oops. + // Emit GC store barriers for the oops we have copied (O2 minus G1), + // and report their number to the caller. + __ BIND(fail); + __ subcc(O2_count, G1_remain, O2_count); + __ brx(Assembler::zero, false, Assembler::pt, done); + __ delayed()->not1(O2_count, O0); // report (-1^K) to caller + + __ BIND(do_epilogue); + bs->arraycopy_epilogue(_masm, decorators, T_OBJECT, O0_from, O1_to, O2_count); + + __ BIND(done); + inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4); + __ retl(); + __ delayed()->nop(); // return value in 00 + + return start; + } + + + // Generate 'unsafe' array copy stub + // Though just as safe as the other stubs, it takes an unscaled + // size_t argument instead of an element count. + // + // Arguments for generated stub: + // from: O0 + // to: O1 + // count: O2 byte count, treated as ssize_t, can be zero + // + // Examines the alignment of the operands and dispatches + // to a long, int, short, or byte copy loop. + // + address generate_unsafe_copy(const char* name, + address byte_copy_entry, + address short_copy_entry, + address int_copy_entry, + address long_copy_entry) { + + const Register O0_from = O0; // source array address + const Register O1_to = O1; // destination array address + const Register O2_count = O2; // elements count + + const Register G1_bits = G1; // test copy of low bits + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + // bump this on entry, not on exit: + inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr, G1, G3); + + __ or3(O0_from, O1_to, G1_bits); + __ or3(O2_count, G1_bits, G1_bits); + + __ btst(BytesPerLong-1, G1_bits); + __ br(Assembler::zero, true, Assembler::pt, + long_copy_entry, relocInfo::runtime_call_type); + // scale the count on the way out: + __ delayed()->srax(O2_count, LogBytesPerLong, O2_count); + + __ btst(BytesPerInt-1, G1_bits); + __ br(Assembler::zero, true, Assembler::pt, + int_copy_entry, relocInfo::runtime_call_type); + // scale the count on the way out: + __ delayed()->srax(O2_count, LogBytesPerInt, O2_count); + + __ btst(BytesPerShort-1, G1_bits); + __ br(Assembler::zero, true, Assembler::pt, + short_copy_entry, relocInfo::runtime_call_type); + // scale the count on the way out: + __ delayed()->srax(O2_count, LogBytesPerShort, O2_count); + + __ br(Assembler::always, false, Assembler::pt, + byte_copy_entry, relocInfo::runtime_call_type); + __ delayed()->nop(); + + return start; + } + + + // Perform range checks on the proposed arraycopy. + // Kills the two temps, but nothing else. + // Also, clean the sign bits of src_pos and dst_pos. + void arraycopy_range_checks(Register src, // source array oop (O0) + Register src_pos, // source position (O1) + Register dst, // destination array oo (O2) + Register dst_pos, // destination position (O3) + Register length, // length of copy (O4) + Register temp1, Register temp2, + Label& L_failed) { + BLOCK_COMMENT("arraycopy_range_checks:"); + + // if (src_pos + length > arrayOop(src)->length() ) FAIL; + + const Register array_length = temp1; // scratch + const Register end_pos = temp2; // scratch + + // Note: This next instruction may be in the delay slot of a branch: + __ add(length, src_pos, end_pos); // src_pos + length + __ lduw(src, arrayOopDesc::length_offset_in_bytes(), array_length); + __ cmp(end_pos, array_length); + __ br(Assembler::greater, false, Assembler::pn, L_failed); + + // if (dst_pos + length > arrayOop(dst)->length() ) FAIL; + __ delayed()->add(length, dst_pos, end_pos); // dst_pos + length + __ lduw(dst, arrayOopDesc::length_offset_in_bytes(), array_length); + __ cmp(end_pos, array_length); + __ br(Assembler::greater, false, Assembler::pn, L_failed); + + // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'. + // Move with sign extension can be used since they are positive. + __ delayed()->signx(src_pos, src_pos); + __ signx(dst_pos, dst_pos); + + BLOCK_COMMENT("arraycopy_range_checks done"); + } + + + // + // Generate generic array copy stubs + // + // Input: + // O0 - src oop + // O1 - src_pos + // O2 - dst oop + // O3 - dst_pos + // O4 - element count + // + // Output: + // O0 == 0 - success + // O0 == -1 - need to call System.arraycopy + // + address generate_generic_copy(const char *name, + address entry_jbyte_arraycopy, + address entry_jshort_arraycopy, + address entry_jint_arraycopy, + address entry_oop_arraycopy, + address entry_jlong_arraycopy, + address entry_checkcast_arraycopy) { + Label L_failed, L_objArray; + + // Input registers + const Register src = O0; // source array oop + const Register src_pos = O1; // source position + const Register dst = O2; // destination array oop + const Register dst_pos = O3; // destination position + const Register length = O4; // elements count + + // registers used as temp + const Register G3_src_klass = G3; // source array klass + const Register G4_dst_klass = G4; // destination array klass + const Register G5_lh = G5; // layout handler + const Register O5_temp = O5; + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + // bump this on entry, not on exit: + inc_counter_np(SharedRuntime::_generic_array_copy_ctr, G1, G3); + + // In principle, the int arguments could be dirty. + //assert_clean_int(src_pos, G1); + //assert_clean_int(dst_pos, G1); + //assert_clean_int(length, G1); + + //----------------------------------------------------------------------- + // Assembler stubs will be used for this call to arraycopy + // if the following conditions are met: + // + // (1) src and dst must not be null. + // (2) src_pos must not be negative. + // (3) dst_pos must not be negative. + // (4) length must not be negative. + // (5) src klass and dst klass should be the same and not NULL. + // (6) src and dst should be arrays. + // (7) src_pos + length must not exceed length of src. + // (8) dst_pos + length must not exceed length of dst. + BLOCK_COMMENT("arraycopy initial argument checks"); + + // if (src == NULL) return -1; + __ br_null(src, false, Assembler::pn, L_failed); + + // if (src_pos < 0) return -1; + __ delayed()->tst(src_pos); + __ br(Assembler::negative, false, Assembler::pn, L_failed); + __ delayed()->nop(); + + // if (dst == NULL) return -1; + __ br_null(dst, false, Assembler::pn, L_failed); + + // if (dst_pos < 0) return -1; + __ delayed()->tst(dst_pos); + __ br(Assembler::negative, false, Assembler::pn, L_failed); + + // if (length < 0) return -1; + __ delayed()->tst(length); + __ br(Assembler::negative, false, Assembler::pn, L_failed); + + BLOCK_COMMENT("arraycopy argument klass checks"); + // get src->klass() + if (UseCompressedClassPointers) { + __ delayed()->nop(); // ??? not good + __ load_klass(src, G3_src_klass); + } else { + __ delayed()->ld_ptr(src, oopDesc::klass_offset_in_bytes(), G3_src_klass); + } + +#ifdef ASSERT + // assert(src->klass() != NULL); + BLOCK_COMMENT("assert klasses not null"); + { Label L_a, L_b; + __ br_notnull_short(G3_src_klass, Assembler::pt, L_b); // it is broken if klass is NULL + __ bind(L_a); + __ stop("broken null klass"); + __ bind(L_b); + __ load_klass(dst, G4_dst_klass); + __ br_null(G4_dst_klass, false, Assembler::pn, L_a); // this would be broken also + __ delayed()->mov(G0, G4_dst_klass); // scribble the temp + BLOCK_COMMENT("assert done"); + } +#endif + + // Load layout helper + // + // |array_tag| | header_size | element_type | |log2_element_size| + // 32 30 24 16 8 2 0 + // + // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 + // + + int lh_offset = in_bytes(Klass::layout_helper_offset()); + + // Load 32-bits signed value. Use br() instruction with it to check icc. + __ lduw(G3_src_klass, lh_offset, G5_lh); + + if (UseCompressedClassPointers) { + __ load_klass(dst, G4_dst_klass); + } + // Handle objArrays completely differently... + juint objArray_lh = Klass::array_layout_helper(T_OBJECT); + __ set(objArray_lh, O5_temp); + __ cmp(G5_lh, O5_temp); + __ br(Assembler::equal, false, Assembler::pt, L_objArray); + if (UseCompressedClassPointers) { + __ delayed()->nop(); + } else { + __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass); + } + + // if (src->klass() != dst->klass()) return -1; + __ cmp_and_brx_short(G3_src_klass, G4_dst_klass, Assembler::notEqual, Assembler::pn, L_failed); + + // if (!src->is_Array()) return -1; + __ cmp(G5_lh, Klass::_lh_neutral_value); // < 0 + __ br(Assembler::greaterEqual, false, Assembler::pn, L_failed); + + // At this point, it is known to be a typeArray (array_tag 0x3). +#ifdef ASSERT + __ delayed()->nop(); + { Label L; + jint lh_prim_tag_in_place = (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift); + __ set(lh_prim_tag_in_place, O5_temp); + __ cmp(G5_lh, O5_temp); + __ br(Assembler::greaterEqual, false, Assembler::pt, L); + __ delayed()->nop(); + __ stop("must be a primitive array"); + __ bind(L); + } +#else + __ delayed(); // match next insn to prev branch +#endif + + arraycopy_range_checks(src, src_pos, dst, dst_pos, length, + O5_temp, G4_dst_klass, L_failed); + + // TypeArrayKlass + // + // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); + // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); + // + + const Register G4_offset = G4_dst_klass; // array offset + const Register G3_elsize = G3_src_klass; // log2 element size + + __ srl(G5_lh, Klass::_lh_header_size_shift, G4_offset); + __ and3(G4_offset, Klass::_lh_header_size_mask, G4_offset); // array_offset + __ add(src, G4_offset, src); // src array offset + __ add(dst, G4_offset, dst); // dst array offset + __ and3(G5_lh, Klass::_lh_log2_element_size_mask, G3_elsize); // log2 element size + + // next registers should be set before the jump to corresponding stub + const Register from = O0; // source array address + const Register to = O1; // destination array address + const Register count = O2; // elements count + + // 'from', 'to', 'count' registers should be set in this order + // since they are the same as 'src', 'src_pos', 'dst'. + + BLOCK_COMMENT("scale indexes to element size"); + __ sll_ptr(src_pos, G3_elsize, src_pos); + __ sll_ptr(dst_pos, G3_elsize, dst_pos); + __ add(src, src_pos, from); // src_addr + __ add(dst, dst_pos, to); // dst_addr + + BLOCK_COMMENT("choose copy loop based on element size"); + __ cmp(G3_elsize, 0); + __ br(Assembler::equal, true, Assembler::pt, entry_jbyte_arraycopy); + __ delayed()->signx(length, count); // length + + __ cmp(G3_elsize, LogBytesPerShort); + __ br(Assembler::equal, true, Assembler::pt, entry_jshort_arraycopy); + __ delayed()->signx(length, count); // length + + __ cmp(G3_elsize, LogBytesPerInt); + __ br(Assembler::equal, true, Assembler::pt, entry_jint_arraycopy); + __ delayed()->signx(length, count); // length +#ifdef ASSERT + { Label L; + __ cmp_and_br_short(G3_elsize, LogBytesPerLong, Assembler::equal, Assembler::pt, L); + __ stop("must be long copy, but elsize is wrong"); + __ bind(L); + } +#endif + __ br(Assembler::always, false, Assembler::pt, entry_jlong_arraycopy); + __ delayed()->signx(length, count); // length + + // ObjArrayKlass + __ BIND(L_objArray); + // live at this point: G3_src_klass, G4_dst_klass, src[_pos], dst[_pos], length + + Label L_plain_copy, L_checkcast_copy; + // test array classes for subtyping + __ cmp(G3_src_klass, G4_dst_klass); // usual case is exact equality + __ brx(Assembler::notEqual, true, Assembler::pn, L_checkcast_copy); + __ delayed()->lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted from below + + // Identically typed arrays can be copied without element-wise checks. + arraycopy_range_checks(src, src_pos, dst, dst_pos, length, + O5_temp, G5_lh, L_failed); + + __ add(src, arrayOopDesc::base_offset_in_bytes(T_OBJECT), src); //src offset + __ add(dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT), dst); //dst offset + __ sll_ptr(src_pos, LogBytesPerHeapOop, src_pos); + __ sll_ptr(dst_pos, LogBytesPerHeapOop, dst_pos); + __ add(src, src_pos, from); // src_addr + __ add(dst, dst_pos, to); // dst_addr + __ BIND(L_plain_copy); + __ br(Assembler::always, false, Assembler::pt, entry_oop_arraycopy); + __ delayed()->signx(length, count); // length + + __ BIND(L_checkcast_copy); + // live at this point: G3_src_klass, G4_dst_klass + { + // Before looking at dst.length, make sure dst is also an objArray. + // lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted to delay slot + __ cmp(G5_lh, O5_temp); + __ br(Assembler::notEqual, false, Assembler::pn, L_failed); + + // It is safe to examine both src.length and dst.length. + __ delayed(); // match next insn to prev branch + arraycopy_range_checks(src, src_pos, dst, dst_pos, length, + O5_temp, G5_lh, L_failed); + + // Marshal the base address arguments now, freeing registers. + __ add(src, arrayOopDesc::base_offset_in_bytes(T_OBJECT), src); //src offset + __ add(dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT), dst); //dst offset + __ sll_ptr(src_pos, LogBytesPerHeapOop, src_pos); + __ sll_ptr(dst_pos, LogBytesPerHeapOop, dst_pos); + __ add(src, src_pos, from); // src_addr + __ add(dst, dst_pos, to); // dst_addr + __ signx(length, count); // length (reloaded) + + Register sco_temp = O3; // this register is free now + assert_different_registers(from, to, count, sco_temp, + G4_dst_klass, G3_src_klass); + + // Generate the type check. + int sco_offset = in_bytes(Klass::super_check_offset_offset()); + __ lduw(G4_dst_klass, sco_offset, sco_temp); + generate_type_check(G3_src_klass, sco_temp, G4_dst_klass, + O5_temp, L_plain_copy); + + // Fetch destination element klass from the ObjArrayKlass header. + int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); + + // the checkcast_copy loop needs two extra arguments: + __ ld_ptr(G4_dst_klass, ek_offset, O4); // dest elem klass + // lduw(O4, sco_offset, O3); // sco of elem klass + + __ br(Assembler::always, false, Assembler::pt, entry_checkcast_arraycopy); + __ delayed()->lduw(O4, sco_offset, O3); + } + + __ BIND(L_failed); + __ retl(); + __ delayed()->sub(G0, 1, O0); // return -1 + return start; + } + + void generate_arraycopy_stubs() { + address entry; + address entry_jbyte_arraycopy; + address entry_jshort_arraycopy; + address entry_jint_arraycopy; + address entry_oop_arraycopy; + address entry_jlong_arraycopy; + address entry_checkcast_arraycopy; + + address ucm_common_error_exit = generate_unsafecopy_common_error_exit(); + UnsafeCopyMemory::set_common_exit_stub_pc(ucm_common_error_exit); + + //*** jbyte + // Always need aligned and unaligned versions + StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, &entry, + "jbyte_disjoint_arraycopy"); + StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, entry, + &entry_jbyte_arraycopy, + "jbyte_arraycopy"); + StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, &entry, + "arrayof_jbyte_disjoint_arraycopy"); + StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, entry, NULL, + "arrayof_jbyte_arraycopy"); + + //*** jshort + // Always need aligned and unaligned versions + StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry, + "jshort_disjoint_arraycopy"); + StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry, + &entry_jshort_arraycopy, + "jshort_arraycopy"); + StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry, + "arrayof_jshort_disjoint_arraycopy"); + StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, entry, NULL, + "arrayof_jshort_arraycopy"); + + //*** jint + // Aligned versions + StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry, + "arrayof_jint_disjoint_arraycopy"); + StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy, + "arrayof_jint_arraycopy"); + // In 64 bit we need both aligned and unaligned versions of jint arraycopy. + // entry_jint_arraycopy always points to the unaligned version (notice that we overwrite it). + StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, &entry, + "jint_disjoint_arraycopy"); + StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, entry, + &entry_jint_arraycopy, + "jint_arraycopy"); + + //*** jlong + // It is always aligned + StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry, + "arrayof_jlong_disjoint_arraycopy"); + StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy, + "arrayof_jlong_arraycopy"); + StubRoutines::_jlong_disjoint_arraycopy = StubRoutines::_arrayof_jlong_disjoint_arraycopy; + StubRoutines::_jlong_arraycopy = StubRoutines::_arrayof_jlong_arraycopy; + + + //*** oops + // Aligned versions + StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_oop_copy(true, &entry, + "arrayof_oop_disjoint_arraycopy"); + StubRoutines::_arrayof_oop_arraycopy = generate_conjoint_oop_copy(true, entry, &entry_oop_arraycopy, + "arrayof_oop_arraycopy"); + // Aligned versions without pre-barriers + StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(true, &entry, + "arrayof_oop_disjoint_arraycopy_uninit", + /*dest_uninitialized*/true); + StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy(true, entry, NULL, + "arrayof_oop_arraycopy_uninit", + /*dest_uninitialized*/true); + if (UseCompressedOops) { + // With compressed oops we need unaligned versions, notice that we overwrite entry_oop_arraycopy. + StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_oop_copy(false, &entry, + "oop_disjoint_arraycopy"); + StubRoutines::_oop_arraycopy = generate_conjoint_oop_copy(false, entry, &entry_oop_arraycopy, + "oop_arraycopy"); + // Unaligned versions without pre-barriers + StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(false, &entry, + "oop_disjoint_arraycopy_uninit", + /*dest_uninitialized*/true); + StubRoutines::_oop_arraycopy_uninit = generate_conjoint_oop_copy(false, entry, NULL, + "oop_arraycopy_uninit", + /*dest_uninitialized*/true); + } else { + // oop arraycopy is always aligned on 32bit and 64bit without compressed oops + StubRoutines::_oop_disjoint_arraycopy = StubRoutines::_arrayof_oop_disjoint_arraycopy; + StubRoutines::_oop_arraycopy = StubRoutines::_arrayof_oop_arraycopy; + StubRoutines::_oop_disjoint_arraycopy_uninit = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit; + StubRoutines::_oop_arraycopy_uninit = StubRoutines::_arrayof_oop_arraycopy_uninit; + } + + StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy); + StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, + /*dest_uninitialized*/true); + + StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy", + entry_jbyte_arraycopy, + entry_jshort_arraycopy, + entry_jint_arraycopy, + entry_jlong_arraycopy); + StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy", + entry_jbyte_arraycopy, + entry_jshort_arraycopy, + entry_jint_arraycopy, + entry_oop_arraycopy, + entry_jlong_arraycopy, + entry_checkcast_arraycopy); + + StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); + StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); + StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); + StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); + StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); + StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); + } + + address generate_aescrypt_encryptBlock() { + // required since we read expanded key 'int' array starting first element without alignment considerations + assert((arrayOopDesc::base_offset_in_bytes(T_INT) & 7) == 0, + "the following code assumes that first element of an int array is aligned to 8 bytes"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); + Label L_load_misaligned_input, L_load_expanded_key, L_doLast128bit, L_storeOutput, L_store_misaligned_output; + address start = __ pc(); + Register from = O0; // source byte array + Register to = O1; // destination byte array + Register key = O2; // expanded key array + const Register keylen = O4; //reg for storing expanded key array length + + // read expanded key length + __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0); + + // Method to address arbitrary alignment for load instructions: + // Check last 3 bits of 'from' address to see if it is aligned to 8-byte boundary + // If zero/aligned then continue with double FP load instructions + // If not zero/mis-aligned then alignaddr will set GSR.align with number of bytes to skip during faligndata + // alignaddr will also convert arbitrary aligned 'from' address to nearest 8-byte aligned address + // load 3 * 8-byte components (to read 16 bytes input) in 3 different FP regs starting at this aligned address + // faligndata will then extract (based on GSR.align value) the appropriate 8 bytes from the 2 source regs + + // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero + __ andcc(from, 7, G0); + __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input); + __ delayed()->alignaddr(from, G0, from); + + // aligned case: load input into F54-F56 + __ ldf(FloatRegisterImpl::D, from, 0, F54); + __ ldf(FloatRegisterImpl::D, from, 8, F56); + __ ba_short(L_load_expanded_key); + + __ BIND(L_load_misaligned_input); + __ ldf(FloatRegisterImpl::D, from, 0, F54); + __ ldf(FloatRegisterImpl::D, from, 8, F56); + __ ldf(FloatRegisterImpl::D, from, 16, F58); + __ faligndata(F54, F56, F54); + __ faligndata(F56, F58, F56); + + __ BIND(L_load_expanded_key); + // Since we load expanded key buffers starting first element, 8-byte alignment is guaranteed + for ( int i = 0; i <= 38; i += 2 ) { + __ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i)); + } + + // perform cipher transformation + __ fxor(FloatRegisterImpl::D, F0, F54, F54); + __ fxor(FloatRegisterImpl::D, F2, F56, F56); + // rounds 1 through 8 + for ( int i = 4; i <= 28; i += 8 ) { + __ aes_eround01(as_FloatRegister(i), F54, F56, F58); + __ aes_eround23(as_FloatRegister(i+2), F54, F56, F60); + __ aes_eround01(as_FloatRegister(i+4), F58, F60, F54); + __ aes_eround23(as_FloatRegister(i+6), F58, F60, F56); + } + __ aes_eround01(F36, F54, F56, F58); //round 9 + __ aes_eround23(F38, F54, F56, F60); + + // 128-bit original key size + __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pt, L_doLast128bit); + + for ( int i = 40; i <= 50; i += 2 ) { + __ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i) ); + } + __ aes_eround01(F40, F58, F60, F54); //round 10 + __ aes_eround23(F42, F58, F60, F56); + __ aes_eround01(F44, F54, F56, F58); //round 11 + __ aes_eround23(F46, F54, F56, F60); + + // 192-bit original key size + __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pt, L_storeOutput); + + __ ldf(FloatRegisterImpl::D, key, 208, F52); + __ aes_eround01(F48, F58, F60, F54); //round 12 + __ aes_eround23(F50, F58, F60, F56); + __ ldf(FloatRegisterImpl::D, key, 216, F46); + __ ldf(FloatRegisterImpl::D, key, 224, F48); + __ ldf(FloatRegisterImpl::D, key, 232, F50); + __ aes_eround01(F52, F54, F56, F58); //round 13 + __ aes_eround23(F46, F54, F56, F60); + __ ba_short(L_storeOutput); + + __ BIND(L_doLast128bit); + __ ldf(FloatRegisterImpl::D, key, 160, F48); + __ ldf(FloatRegisterImpl::D, key, 168, F50); + + __ BIND(L_storeOutput); + // perform last round of encryption common for all key sizes + __ aes_eround01_l(F48, F58, F60, F54); //last round + __ aes_eround23_l(F50, F58, F60, F56); + + // Method to address arbitrary alignment for store instructions: + // Check last 3 bits of 'dest' address to see if it is aligned to 8-byte boundary + // If zero/aligned then continue with double FP store instructions + // If not zero/mis-aligned then edge8n will generate edge mask in result reg (O3 in below case) + // Example: If dest address is 0x07 and nearest 8-byte aligned address is 0x00 then edge mask will be 00000001 + // Compute (8-n) where n is # of bytes skipped by partial store(stpartialf) inst from edge mask, n=7 in this case + // We get the value of n from the andcc that checks 'dest' alignment. n is available in O5 in below case. + // Set GSR.align to (8-n) using alignaddr + // Circular byte shift store values by n places so that the original bytes are at correct position for stpartialf + // Set the arbitrarily aligned 'dest' address to nearest 8-byte aligned address + // Store (partial) the original first (8-n) bytes starting at the original 'dest' address + // Negate the edge mask so that the subsequent stpartialf can store the original (8-n-1)th through 8th bytes at appropriate address + // We need to execute this process for both the 8-byte result values + + // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero + __ andcc(to, 7, O5); + __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output); + __ delayed()->edge8n(to, G0, O3); + + // aligned case: store output into the destination array + __ stf(FloatRegisterImpl::D, F54, to, 0); + __ retl(); + __ delayed()->stf(FloatRegisterImpl::D, F56, to, 8); + + __ BIND(L_store_misaligned_output); + __ add(to, 8, O4); + __ mov(8, O2); + __ sub(O2, O5, O2); + __ alignaddr(O2, G0, O2); + __ faligndata(F54, F54, F54); + __ faligndata(F56, F56, F56); + __ and3(to, -8, to); + __ and3(O4, -8, O4); + __ stpartialf(to, O3, F54, Assembler::ASI_PST8_PRIMARY); + __ stpartialf(O4, O3, F56, Assembler::ASI_PST8_PRIMARY); + __ add(to, 8, to); + __ add(O4, 8, O4); + __ orn(G0, O3, O3); + __ stpartialf(to, O3, F54, Assembler::ASI_PST8_PRIMARY); + __ retl(); + __ delayed()->stpartialf(O4, O3, F56, Assembler::ASI_PST8_PRIMARY); + + return start; + } + + address generate_aescrypt_decryptBlock() { + assert((arrayOopDesc::base_offset_in_bytes(T_INT) & 7) == 0, + "the following code assumes that first element of an int array is aligned to 8 bytes"); + // required since we read original key 'byte' array as well in the decryption stubs + assert((arrayOopDesc::base_offset_in_bytes(T_BYTE) & 7) == 0, + "the following code assumes that first element of a byte array is aligned to 8 bytes"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); + address start = __ pc(); + Label L_load_misaligned_input, L_load_original_key, L_expand192bit, L_expand256bit, L_reload_misaligned_input; + Label L_256bit_transform, L_common_transform, L_store_misaligned_output; + Register from = O0; // source byte array + Register to = O1; // destination byte array + Register key = O2; // expanded key array + Register original_key = O3; // original key array only required during decryption + const Register keylen = O4; // reg for storing expanded key array length + + // read expanded key array length + __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0); + + // save 'from' since we may need to recheck alignment in case of 256-bit decryption + __ mov(from, G1); + + // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero + __ andcc(from, 7, G0); + __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input); + __ delayed()->alignaddr(from, G0, from); + + // aligned case: load input into F52-F54 + __ ldf(FloatRegisterImpl::D, from, 0, F52); + __ ldf(FloatRegisterImpl::D, from, 8, F54); + __ ba_short(L_load_original_key); + + __ BIND(L_load_misaligned_input); + __ ldf(FloatRegisterImpl::D, from, 0, F52); + __ ldf(FloatRegisterImpl::D, from, 8, F54); + __ ldf(FloatRegisterImpl::D, from, 16, F56); + __ faligndata(F52, F54, F52); + __ faligndata(F54, F56, F54); + + __ BIND(L_load_original_key); + // load original key from SunJCE expanded decryption key + // Since we load original key buffer starting first element, 8-byte alignment is guaranteed + for ( int i = 0; i <= 3; i++ ) { + __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); + } + + // 256-bit original key size + __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_expand256bit); + + // 192-bit original key size + __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_expand192bit); + + // 128-bit original key size + // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions + for ( int i = 0; i <= 36; i += 4 ) { + __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+2), i/4, as_FloatRegister(i+4)); + __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+4), as_FloatRegister(i+6)); + } + + // perform 128-bit key specific inverse cipher transformation + __ fxor(FloatRegisterImpl::D, F42, F54, F54); + __ fxor(FloatRegisterImpl::D, F40, F52, F52); + __ ba_short(L_common_transform); + + __ BIND(L_expand192bit); + + // start loading rest of the 192-bit key + __ ldf(FloatRegisterImpl::S, original_key, 16, F4); + __ ldf(FloatRegisterImpl::S, original_key, 20, F5); + + // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions + for ( int i = 0; i <= 36; i += 6 ) { + __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+4), i/6, as_FloatRegister(i+6)); + __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+6), as_FloatRegister(i+8)); + __ aes_kexpand2(as_FloatRegister(i+4), as_FloatRegister(i+8), as_FloatRegister(i+10)); + } + __ aes_kexpand1(F42, F46, 7, F48); + __ aes_kexpand2(F44, F48, F50); + + // perform 192-bit key specific inverse cipher transformation + __ fxor(FloatRegisterImpl::D, F50, F54, F54); + __ fxor(FloatRegisterImpl::D, F48, F52, F52); + __ aes_dround23(F46, F52, F54, F58); + __ aes_dround01(F44, F52, F54, F56); + __ aes_dround23(F42, F56, F58, F54); + __ aes_dround01(F40, F56, F58, F52); + __ ba_short(L_common_transform); + + __ BIND(L_expand256bit); + + // load rest of the 256-bit key + for ( int i = 4; i <= 7; i++ ) { + __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); + } + + // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions + for ( int i = 0; i <= 40; i += 8 ) { + __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+6), i/8, as_FloatRegister(i+8)); + __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+8), as_FloatRegister(i+10)); + __ aes_kexpand0(as_FloatRegister(i+4), as_FloatRegister(i+10), as_FloatRegister(i+12)); + __ aes_kexpand2(as_FloatRegister(i+6), as_FloatRegister(i+12), as_FloatRegister(i+14)); + } + __ aes_kexpand1(F48, F54, 6, F56); + __ aes_kexpand2(F50, F56, F58); + + for ( int i = 0; i <= 6; i += 2 ) { + __ fsrc2(FloatRegisterImpl::D, as_FloatRegister(58-i), as_FloatRegister(i)); + } + + // reload original 'from' address + __ mov(G1, from); + + // re-check 8-byte alignment + __ andcc(from, 7, G0); + __ br(Assembler::notZero, true, Assembler::pn, L_reload_misaligned_input); + __ delayed()->alignaddr(from, G0, from); + + // aligned case: load input into F52-F54 + __ ldf(FloatRegisterImpl::D, from, 0, F52); + __ ldf(FloatRegisterImpl::D, from, 8, F54); + __ ba_short(L_256bit_transform); + + __ BIND(L_reload_misaligned_input); + __ ldf(FloatRegisterImpl::D, from, 0, F52); + __ ldf(FloatRegisterImpl::D, from, 8, F54); + __ ldf(FloatRegisterImpl::D, from, 16, F56); + __ faligndata(F52, F54, F52); + __ faligndata(F54, F56, F54); + + // perform 256-bit key specific inverse cipher transformation + __ BIND(L_256bit_transform); + __ fxor(FloatRegisterImpl::D, F0, F54, F54); + __ fxor(FloatRegisterImpl::D, F2, F52, F52); + __ aes_dround23(F4, F52, F54, F58); + __ aes_dround01(F6, F52, F54, F56); + __ aes_dround23(F50, F56, F58, F54); + __ aes_dround01(F48, F56, F58, F52); + __ aes_dround23(F46, F52, F54, F58); + __ aes_dround01(F44, F52, F54, F56); + __ aes_dround23(F42, F56, F58, F54); + __ aes_dround01(F40, F56, F58, F52); + + for ( int i = 0; i <= 7; i++ ) { + __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); + } + + // perform inverse cipher transformations common for all key sizes + __ BIND(L_common_transform); + for ( int i = 38; i >= 6; i -= 8 ) { + __ aes_dround23(as_FloatRegister(i), F52, F54, F58); + __ aes_dround01(as_FloatRegister(i-2), F52, F54, F56); + if ( i != 6) { + __ aes_dround23(as_FloatRegister(i-4), F56, F58, F54); + __ aes_dround01(as_FloatRegister(i-6), F56, F58, F52); + } else { + __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F54); + __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F52); + } + } + + // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero + __ andcc(to, 7, O5); + __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output); + __ delayed()->edge8n(to, G0, O3); + + // aligned case: store output into the destination array + __ stf(FloatRegisterImpl::D, F52, to, 0); + __ retl(); + __ delayed()->stf(FloatRegisterImpl::D, F54, to, 8); + + __ BIND(L_store_misaligned_output); + __ add(to, 8, O4); + __ mov(8, O2); + __ sub(O2, O5, O2); + __ alignaddr(O2, G0, O2); + __ faligndata(F52, F52, F52); + __ faligndata(F54, F54, F54); + __ and3(to, -8, to); + __ and3(O4, -8, O4); + __ stpartialf(to, O3, F52, Assembler::ASI_PST8_PRIMARY); + __ stpartialf(O4, O3, F54, Assembler::ASI_PST8_PRIMARY); + __ add(to, 8, to); + __ add(O4, 8, O4); + __ orn(G0, O3, O3); + __ stpartialf(to, O3, F52, Assembler::ASI_PST8_PRIMARY); + __ retl(); + __ delayed()->stpartialf(O4, O3, F54, Assembler::ASI_PST8_PRIMARY); + + return start; + } + + address generate_cipherBlockChaining_encryptAESCrypt() { + assert((arrayOopDesc::base_offset_in_bytes(T_INT) & 7) == 0, + "the following code assumes that first element of an int array is aligned to 8 bytes"); + assert((arrayOopDesc::base_offset_in_bytes(T_BYTE) & 7) == 0, + "the following code assumes that first element of a byte array is aligned to 8 bytes"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); + Label L_cbcenc128, L_load_misaligned_input_128bit, L_128bit_transform, L_store_misaligned_output_128bit; + Label L_check_loop_end_128bit, L_cbcenc192, L_load_misaligned_input_192bit, L_192bit_transform; + Label L_store_misaligned_output_192bit, L_check_loop_end_192bit, L_cbcenc256, L_load_misaligned_input_256bit; + Label L_256bit_transform, L_store_misaligned_output_256bit, L_check_loop_end_256bit; + address start = __ pc(); + Register from = I0; // source byte array + Register to = I1; // destination byte array + Register key = I2; // expanded key array + Register rvec = I3; // init vector + const Register len_reg = I4; // cipher length + const Register keylen = I5; // reg for storing expanded key array length + + __ save_frame(0); + // save cipher len to return in the end + __ mov(len_reg, L0); + + // read expanded key length + __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0); + + // load initial vector, 8-byte alignment is guaranteed + __ ldf(FloatRegisterImpl::D, rvec, 0, F60); + __ ldf(FloatRegisterImpl::D, rvec, 8, F62); + // load key, 8-byte alignment is guaranteed + __ ldx(key,0,G1); + __ ldx(key,8,G5); + + // start loading expanded key, 8-byte alignment is guaranteed + for ( int i = 0, j = 16; i <= 38; i += 2, j += 8 ) { + __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i)); + } + + // 128-bit original key size + __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pt, L_cbcenc128); + + for ( int i = 40, j = 176; i <= 46; i += 2, j += 8 ) { + __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i)); + } + + // 192-bit original key size + __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pt, L_cbcenc192); + + for ( int i = 48, j = 208; i <= 54; i += 2, j += 8 ) { + __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i)); + } + + // 256-bit original key size + __ ba_short(L_cbcenc256); + + __ align(OptoLoopAlignment); + __ BIND(L_cbcenc128); + // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero + __ andcc(from, 7, G0); + __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input_128bit); + __ delayed()->mov(from, L1); // save original 'from' address before alignaddr + + // aligned case: load input into G3 and G4 + __ ldx(from,0,G3); + __ ldx(from,8,G4); + __ ba_short(L_128bit_transform); + + __ BIND(L_load_misaligned_input_128bit); + // can clobber F48, F50 and F52 as they are not used in 128 and 192-bit key encryption + __ alignaddr(from, G0, from); + __ ldf(FloatRegisterImpl::D, from, 0, F48); + __ ldf(FloatRegisterImpl::D, from, 8, F50); + __ ldf(FloatRegisterImpl::D, from, 16, F52); + __ faligndata(F48, F50, F48); + __ faligndata(F50, F52, F50); + __ movdtox(F48, G3); + __ movdtox(F50, G4); + __ mov(L1, from); + + __ BIND(L_128bit_transform); + __ xor3(G1,G3,G3); + __ xor3(G5,G4,G4); + __ movxtod(G3,F56); + __ movxtod(G4,F58); + __ fxor(FloatRegisterImpl::D, F60, F56, F60); + __ fxor(FloatRegisterImpl::D, F62, F58, F62); + + // TEN_EROUNDS + for ( int i = 0; i <= 32; i += 8 ) { + __ aes_eround01(as_FloatRegister(i), F60, F62, F56); + __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58); + if (i != 32 ) { + __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60); + __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62); + } else { + __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60); + __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62); + } + } + + // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero + __ andcc(to, 7, L1); + __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_128bit); + __ delayed()->edge8n(to, G0, L2); + + // aligned case: store output into the destination array + __ stf(FloatRegisterImpl::D, F60, to, 0); + __ stf(FloatRegisterImpl::D, F62, to, 8); + __ ba_short(L_check_loop_end_128bit); + + __ BIND(L_store_misaligned_output_128bit); + __ add(to, 8, L3); + __ mov(8, L4); + __ sub(L4, L1, L4); + __ alignaddr(L4, G0, L4); + // save cipher text before circular right shift + // as it needs to be stored as iv for next block (see code before next retl) + __ movdtox(F60, L6); + __ movdtox(F62, L7); + __ faligndata(F60, F60, F60); + __ faligndata(F62, F62, F62); + __ mov(to, L5); + __ and3(to, -8, to); + __ and3(L3, -8, L3); + __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY); + __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY); + __ add(to, 8, to); + __ add(L3, 8, L3); + __ orn(G0, L2, L2); + __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY); + __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY); + __ mov(L5, to); + __ movxtod(L6, F60); + __ movxtod(L7, F62); + + __ BIND(L_check_loop_end_128bit); + __ add(from, 16, from); + __ add(to, 16, to); + __ subcc(len_reg, 16, len_reg); + __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc128); + __ delayed()->nop(); + // re-init initial vector for next block, 8-byte alignment is guaranteed + __ stf(FloatRegisterImpl::D, F60, rvec, 0); + __ stf(FloatRegisterImpl::D, F62, rvec, 8); + __ mov(L0, I0); + __ ret(); + __ delayed()->restore(); + + __ align(OptoLoopAlignment); + __ BIND(L_cbcenc192); + // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero + __ andcc(from, 7, G0); + __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input_192bit); + __ delayed()->mov(from, L1); // save original 'from' address before alignaddr + + // aligned case: load input into G3 and G4 + __ ldx(from,0,G3); + __ ldx(from,8,G4); + __ ba_short(L_192bit_transform); + + __ BIND(L_load_misaligned_input_192bit); + // can clobber F48, F50 and F52 as they are not used in 128 and 192-bit key encryption + __ alignaddr(from, G0, from); + __ ldf(FloatRegisterImpl::D, from, 0, F48); + __ ldf(FloatRegisterImpl::D, from, 8, F50); + __ ldf(FloatRegisterImpl::D, from, 16, F52); + __ faligndata(F48, F50, F48); + __ faligndata(F50, F52, F50); + __ movdtox(F48, G3); + __ movdtox(F50, G4); + __ mov(L1, from); + + __ BIND(L_192bit_transform); + __ xor3(G1,G3,G3); + __ xor3(G5,G4,G4); + __ movxtod(G3,F56); + __ movxtod(G4,F58); + __ fxor(FloatRegisterImpl::D, F60, F56, F60); + __ fxor(FloatRegisterImpl::D, F62, F58, F62); + + // TWELEVE_EROUNDS + for ( int i = 0; i <= 40; i += 8 ) { + __ aes_eround01(as_FloatRegister(i), F60, F62, F56); + __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58); + if (i != 40 ) { + __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60); + __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62); + } else { + __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60); + __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62); + } + } + + // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero + __ andcc(to, 7, L1); + __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_192bit); + __ delayed()->edge8n(to, G0, L2); + + // aligned case: store output into the destination array + __ stf(FloatRegisterImpl::D, F60, to, 0); + __ stf(FloatRegisterImpl::D, F62, to, 8); + __ ba_short(L_check_loop_end_192bit); + + __ BIND(L_store_misaligned_output_192bit); + __ add(to, 8, L3); + __ mov(8, L4); + __ sub(L4, L1, L4); + __ alignaddr(L4, G0, L4); + __ movdtox(F60, L6); + __ movdtox(F62, L7); + __ faligndata(F60, F60, F60); + __ faligndata(F62, F62, F62); + __ mov(to, L5); + __ and3(to, -8, to); + __ and3(L3, -8, L3); + __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY); + __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY); + __ add(to, 8, to); + __ add(L3, 8, L3); + __ orn(G0, L2, L2); + __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY); + __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY); + __ mov(L5, to); + __ movxtod(L6, F60); + __ movxtod(L7, F62); + + __ BIND(L_check_loop_end_192bit); + __ add(from, 16, from); + __ subcc(len_reg, 16, len_reg); + __ add(to, 16, to); + __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc192); + __ delayed()->nop(); + // re-init initial vector for next block, 8-byte alignment is guaranteed + __ stf(FloatRegisterImpl::D, F60, rvec, 0); + __ stf(FloatRegisterImpl::D, F62, rvec, 8); + __ mov(L0, I0); + __ ret(); + __ delayed()->restore(); + + __ align(OptoLoopAlignment); + __ BIND(L_cbcenc256); + // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero + __ andcc(from, 7, G0); + __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input_256bit); + __ delayed()->mov(from, L1); // save original 'from' address before alignaddr + + // aligned case: load input into G3 and G4 + __ ldx(from,0,G3); + __ ldx(from,8,G4); + __ ba_short(L_256bit_transform); + + __ BIND(L_load_misaligned_input_256bit); + // cannot clobber F48, F50 and F52. F56, F58 can be used though + __ alignaddr(from, G0, from); + __ movdtox(F60, L2); // save F60 before overwriting + __ ldf(FloatRegisterImpl::D, from, 0, F56); + __ ldf(FloatRegisterImpl::D, from, 8, F58); + __ ldf(FloatRegisterImpl::D, from, 16, F60); + __ faligndata(F56, F58, F56); + __ faligndata(F58, F60, F58); + __ movdtox(F56, G3); + __ movdtox(F58, G4); + __ mov(L1, from); + __ movxtod(L2, F60); + + __ BIND(L_256bit_transform); + __ xor3(G1,G3,G3); + __ xor3(G5,G4,G4); + __ movxtod(G3,F56); + __ movxtod(G4,F58); + __ fxor(FloatRegisterImpl::D, F60, F56, F60); + __ fxor(FloatRegisterImpl::D, F62, F58, F62); + + // FOURTEEN_EROUNDS + for ( int i = 0; i <= 48; i += 8 ) { + __ aes_eround01(as_FloatRegister(i), F60, F62, F56); + __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58); + if (i != 48 ) { + __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60); + __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62); + } else { + __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60); + __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62); + } + } + + // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero + __ andcc(to, 7, L1); + __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_256bit); + __ delayed()->edge8n(to, G0, L2); + + // aligned case: store output into the destination array + __ stf(FloatRegisterImpl::D, F60, to, 0); + __ stf(FloatRegisterImpl::D, F62, to, 8); + __ ba_short(L_check_loop_end_256bit); + + __ BIND(L_store_misaligned_output_256bit); + __ add(to, 8, L3); + __ mov(8, L4); + __ sub(L4, L1, L4); + __ alignaddr(L4, G0, L4); + __ movdtox(F60, L6); + __ movdtox(F62, L7); + __ faligndata(F60, F60, F60); + __ faligndata(F62, F62, F62); + __ mov(to, L5); + __ and3(to, -8, to); + __ and3(L3, -8, L3); + __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY); + __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY); + __ add(to, 8, to); + __ add(L3, 8, L3); + __ orn(G0, L2, L2); + __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY); + __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY); + __ mov(L5, to); + __ movxtod(L6, F60); + __ movxtod(L7, F62); + + __ BIND(L_check_loop_end_256bit); + __ add(from, 16, from); + __ subcc(len_reg, 16, len_reg); + __ add(to, 16, to); + __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc256); + __ delayed()->nop(); + // re-init initial vector for next block, 8-byte alignment is guaranteed + __ stf(FloatRegisterImpl::D, F60, rvec, 0); + __ stf(FloatRegisterImpl::D, F62, rvec, 8); + __ mov(L0, I0); + __ ret(); + __ delayed()->restore(); + + return start; + } + + address generate_cipherBlockChaining_decryptAESCrypt_Parallel() { + assert((arrayOopDesc::base_offset_in_bytes(T_INT) & 7) == 0, + "the following code assumes that first element of an int array is aligned to 8 bytes"); + assert((arrayOopDesc::base_offset_in_bytes(T_BYTE) & 7) == 0, + "the following code assumes that first element of a byte array is aligned to 8 bytes"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); + Label L_cbcdec_end, L_expand192bit, L_expand256bit, L_dec_first_block_start; + Label L_dec_first_block128, L_dec_first_block192, L_dec_next2_blocks128, L_dec_next2_blocks192, L_dec_next2_blocks256; + Label L_load_misaligned_input_first_block, L_transform_first_block, L_load_misaligned_next2_blocks128, L_transform_next2_blocks128; + Label L_load_misaligned_next2_blocks192, L_transform_next2_blocks192, L_load_misaligned_next2_blocks256, L_transform_next2_blocks256; + Label L_store_misaligned_output_first_block, L_check_decrypt_end, L_store_misaligned_output_next2_blocks128; + Label L_check_decrypt_loop_end128, L_store_misaligned_output_next2_blocks192, L_check_decrypt_loop_end192; + Label L_store_misaligned_output_next2_blocks256, L_check_decrypt_loop_end256; + address start = __ pc(); + Register from = I0; // source byte array + Register to = I1; // destination byte array + Register key = I2; // expanded key array + Register rvec = I3; // init vector + const Register len_reg = I4; // cipher length + const Register original_key = I5; // original key array only required during decryption + const Register keylen = L6; // reg for storing expanded key array length + + __ save_frame(0); //args are read from I* registers since we save the frame in the beginning + // save cipher len to return in the end + __ mov(len_reg, L7); + + // load original key from SunJCE expanded decryption key + // Since we load original key buffer starting first element, 8-byte alignment is guaranteed + for ( int i = 0; i <= 3; i++ ) { + __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); + } + + // load initial vector, 8-byte alignment is guaranteed + __ ldx(rvec,0,L0); + __ ldx(rvec,8,L1); + + // read expanded key array length + __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0); + + // 256-bit original key size + __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_expand256bit); + + // 192-bit original key size + __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_expand192bit); + + // 128-bit original key size + // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions + for ( int i = 0; i <= 36; i += 4 ) { + __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+2), i/4, as_FloatRegister(i+4)); + __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+4), as_FloatRegister(i+6)); + } + + // load expanded key[last-1] and key[last] elements + __ movdtox(F40,L2); + __ movdtox(F42,L3); + + __ and3(len_reg, 16, L4); + __ br_null_short(L4, Assembler::pt, L_dec_next2_blocks128); + __ nop(); + + __ ba_short(L_dec_first_block_start); + + __ BIND(L_expand192bit); + // load rest of the 192-bit key + __ ldf(FloatRegisterImpl::S, original_key, 16, F4); + __ ldf(FloatRegisterImpl::S, original_key, 20, F5); + + // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions + for ( int i = 0; i <= 36; i += 6 ) { + __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+4), i/6, as_FloatRegister(i+6)); + __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+6), as_FloatRegister(i+8)); + __ aes_kexpand2(as_FloatRegister(i+4), as_FloatRegister(i+8), as_FloatRegister(i+10)); + } + __ aes_kexpand1(F42, F46, 7, F48); + __ aes_kexpand2(F44, F48, F50); + + // load expanded key[last-1] and key[last] elements + __ movdtox(F48,L2); + __ movdtox(F50,L3); + + __ and3(len_reg, 16, L4); + __ br_null_short(L4, Assembler::pt, L_dec_next2_blocks192); + __ nop(); + + __ ba_short(L_dec_first_block_start); + + __ BIND(L_expand256bit); + // load rest of the 256-bit key + for ( int i = 4; i <= 7; i++ ) { + __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); + } + + // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions + for ( int i = 0; i <= 40; i += 8 ) { + __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+6), i/8, as_FloatRegister(i+8)); + __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+8), as_FloatRegister(i+10)); + __ aes_kexpand0(as_FloatRegister(i+4), as_FloatRegister(i+10), as_FloatRegister(i+12)); + __ aes_kexpand2(as_FloatRegister(i+6), as_FloatRegister(i+12), as_FloatRegister(i+14)); + } + __ aes_kexpand1(F48, F54, 6, F56); + __ aes_kexpand2(F50, F56, F58); + + // load expanded key[last-1] and key[last] elements + __ movdtox(F56,L2); + __ movdtox(F58,L3); + + __ and3(len_reg, 16, L4); + __ br_null_short(L4, Assembler::pt, L_dec_next2_blocks256); + + __ BIND(L_dec_first_block_start); + // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero + __ andcc(from, 7, G0); + __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input_first_block); + __ delayed()->mov(from, G1); // save original 'from' address before alignaddr + + // aligned case: load input into L4 and L5 + __ ldx(from,0,L4); + __ ldx(from,8,L5); + __ ba_short(L_transform_first_block); + + __ BIND(L_load_misaligned_input_first_block); + __ alignaddr(from, G0, from); + // F58, F60, F62 can be clobbered + __ ldf(FloatRegisterImpl::D, from, 0, F58); + __ ldf(FloatRegisterImpl::D, from, 8, F60); + __ ldf(FloatRegisterImpl::D, from, 16, F62); + __ faligndata(F58, F60, F58); + __ faligndata(F60, F62, F60); + __ movdtox(F58, L4); + __ movdtox(F60, L5); + __ mov(G1, from); + + __ BIND(L_transform_first_block); + __ xor3(L2,L4,G1); + __ movxtod(G1,F60); + __ xor3(L3,L5,G1); + __ movxtod(G1,F62); + + // 128-bit original key size + __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pn, L_dec_first_block128); + + // 192-bit original key size + __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_dec_first_block192); + + __ aes_dround23(F54, F60, F62, F58); + __ aes_dround01(F52, F60, F62, F56); + __ aes_dround23(F50, F56, F58, F62); + __ aes_dround01(F48, F56, F58, F60); + + __ BIND(L_dec_first_block192); + __ aes_dround23(F46, F60, F62, F58); + __ aes_dround01(F44, F60, F62, F56); + __ aes_dround23(F42, F56, F58, F62); + __ aes_dround01(F40, F56, F58, F60); + + __ BIND(L_dec_first_block128); + for ( int i = 38; i >= 6; i -= 8 ) { + __ aes_dround23(as_FloatRegister(i), F60, F62, F58); + __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56); + if ( i != 6) { + __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62); + __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60); + } else { + __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62); + __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60); + } + } + + __ movxtod(L0,F56); + __ movxtod(L1,F58); + __ mov(L4,L0); + __ mov(L5,L1); + __ fxor(FloatRegisterImpl::D, F56, F60, F60); + __ fxor(FloatRegisterImpl::D, F58, F62, F62); + + // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero + __ andcc(to, 7, G1); + __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_first_block); + __ delayed()->edge8n(to, G0, G2); + + // aligned case: store output into the destination array + __ stf(FloatRegisterImpl::D, F60, to, 0); + __ stf(FloatRegisterImpl::D, F62, to, 8); + __ ba_short(L_check_decrypt_end); + + __ BIND(L_store_misaligned_output_first_block); + __ add(to, 8, G3); + __ mov(8, G4); + __ sub(G4, G1, G4); + __ alignaddr(G4, G0, G4); + __ faligndata(F60, F60, F60); + __ faligndata(F62, F62, F62); + __ mov(to, G1); + __ and3(to, -8, to); + __ and3(G3, -8, G3); + __ stpartialf(to, G2, F60, Assembler::ASI_PST8_PRIMARY); + __ stpartialf(G3, G2, F62, Assembler::ASI_PST8_PRIMARY); + __ add(to, 8, to); + __ add(G3, 8, G3); + __ orn(G0, G2, G2); + __ stpartialf(to, G2, F60, Assembler::ASI_PST8_PRIMARY); + __ stpartialf(G3, G2, F62, Assembler::ASI_PST8_PRIMARY); + __ mov(G1, to); + + __ BIND(L_check_decrypt_end); + __ add(from, 16, from); + __ add(to, 16, to); + __ subcc(len_reg, 16, len_reg); + __ br(Assembler::equal, false, Assembler::pt, L_cbcdec_end); + __ delayed()->nop(); + + // 256-bit original key size + __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_dec_next2_blocks256); + + // 192-bit original key size + __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_dec_next2_blocks192); + + __ align(OptoLoopAlignment); + __ BIND(L_dec_next2_blocks128); + __ nop(); + + // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero + __ andcc(from, 7, G0); + __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_next2_blocks128); + __ delayed()->mov(from, G1); // save original 'from' address before alignaddr + + // aligned case: load input into G4, G5, L4 and L5 + __ ldx(from,0,G4); + __ ldx(from,8,G5); + __ ldx(from,16,L4); + __ ldx(from,24,L5); + __ ba_short(L_transform_next2_blocks128); + + __ BIND(L_load_misaligned_next2_blocks128); + __ alignaddr(from, G0, from); + // F40, F42, F58, F60, F62 can be clobbered + __ ldf(FloatRegisterImpl::D, from, 0, F40); + __ ldf(FloatRegisterImpl::D, from, 8, F42); + __ ldf(FloatRegisterImpl::D, from, 16, F60); + __ ldf(FloatRegisterImpl::D, from, 24, F62); + __ ldf(FloatRegisterImpl::D, from, 32, F58); + __ faligndata(F40, F42, F40); + __ faligndata(F42, F60, F42); + __ faligndata(F60, F62, F60); + __ faligndata(F62, F58, F62); + __ movdtox(F40, G4); + __ movdtox(F42, G5); + __ movdtox(F60, L4); + __ movdtox(F62, L5); + __ mov(G1, from); + + __ BIND(L_transform_next2_blocks128); + // F40:F42 used for first 16-bytes + __ xor3(L2,G4,G1); + __ movxtod(G1,F40); + __ xor3(L3,G5,G1); + __ movxtod(G1,F42); + + // F60:F62 used for next 16-bytes + __ xor3(L2,L4,G1); + __ movxtod(G1,F60); + __ xor3(L3,L5,G1); + __ movxtod(G1,F62); + + for ( int i = 38; i >= 6; i -= 8 ) { + __ aes_dround23(as_FloatRegister(i), F40, F42, F44); + __ aes_dround01(as_FloatRegister(i-2), F40, F42, F46); + __ aes_dround23(as_FloatRegister(i), F60, F62, F58); + __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56); + if (i != 6 ) { + __ aes_dround23(as_FloatRegister(i-4), F46, F44, F42); + __ aes_dround01(as_FloatRegister(i-6), F46, F44, F40); + __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62); + __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60); + } else { + __ aes_dround23_l(as_FloatRegister(i-4), F46, F44, F42); + __ aes_dround01_l(as_FloatRegister(i-6), F46, F44, F40); + __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62); + __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60); + } + } + + __ movxtod(L0,F46); + __ movxtod(L1,F44); + __ fxor(FloatRegisterImpl::D, F46, F40, F40); + __ fxor(FloatRegisterImpl::D, F44, F42, F42); + + __ movxtod(G4,F56); + __ movxtod(G5,F58); + __ mov(L4,L0); + __ mov(L5,L1); + __ fxor(FloatRegisterImpl::D, F56, F60, F60); + __ fxor(FloatRegisterImpl::D, F58, F62, F62); + + // For mis-aligned store of 32 bytes of result we can do: + // Circular right-shift all 4 FP registers so that 'head' and 'tail' + // parts that need to be stored starting at mis-aligned address are in a FP reg + // the other 3 FP regs can thus be stored using regular store + // we then use the edge + partial-store mechanism to store the 'head' and 'tail' parts + + // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero + __ andcc(to, 7, G1); + __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_next2_blocks128); + __ delayed()->edge8n(to, G0, G2); + + // aligned case: store output into the destination array + __ stf(FloatRegisterImpl::D, F40, to, 0); + __ stf(FloatRegisterImpl::D, F42, to, 8); + __ stf(FloatRegisterImpl::D, F60, to, 16); + __ stf(FloatRegisterImpl::D, F62, to, 24); + __ ba_short(L_check_decrypt_loop_end128); + + __ BIND(L_store_misaligned_output_next2_blocks128); + __ mov(8, G4); + __ sub(G4, G1, G4); + __ alignaddr(G4, G0, G4); + __ faligndata(F40, F42, F56); // F56 can be clobbered + __ faligndata(F42, F60, F42); + __ faligndata(F60, F62, F60); + __ faligndata(F62, F40, F40); + __ mov(to, G1); + __ and3(to, -8, to); + __ stpartialf(to, G2, F40, Assembler::ASI_PST8_PRIMARY); + __ stf(FloatRegisterImpl::D, F56, to, 8); + __ stf(FloatRegisterImpl::D, F42, to, 16); + __ stf(FloatRegisterImpl::D, F60, to, 24); + __ add(to, 32, to); + __ orn(G0, G2, G2); + __ stpartialf(to, G2, F40, Assembler::ASI_PST8_PRIMARY); + __ mov(G1, to); + + __ BIND(L_check_decrypt_loop_end128); + __ add(from, 32, from); + __ add(to, 32, to); + __ subcc(len_reg, 32, len_reg); + __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks128); + __ delayed()->nop(); + __ ba_short(L_cbcdec_end); + + __ align(OptoLoopAlignment); + __ BIND(L_dec_next2_blocks192); + __ nop(); + + // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero + __ andcc(from, 7, G0); + __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_next2_blocks192); + __ delayed()->mov(from, G1); // save original 'from' address before alignaddr + + // aligned case: load input into G4, G5, L4 and L5 + __ ldx(from,0,G4); + __ ldx(from,8,G5); + __ ldx(from,16,L4); + __ ldx(from,24,L5); + __ ba_short(L_transform_next2_blocks192); + + __ BIND(L_load_misaligned_next2_blocks192); + __ alignaddr(from, G0, from); + // F48, F50, F52, F60, F62 can be clobbered + __ ldf(FloatRegisterImpl::D, from, 0, F48); + __ ldf(FloatRegisterImpl::D, from, 8, F50); + __ ldf(FloatRegisterImpl::D, from, 16, F60); + __ ldf(FloatRegisterImpl::D, from, 24, F62); + __ ldf(FloatRegisterImpl::D, from, 32, F52); + __ faligndata(F48, F50, F48); + __ faligndata(F50, F60, F50); + __ faligndata(F60, F62, F60); + __ faligndata(F62, F52, F62); + __ movdtox(F48, G4); + __ movdtox(F50, G5); + __ movdtox(F60, L4); + __ movdtox(F62, L5); + __ mov(G1, from); + + __ BIND(L_transform_next2_blocks192); + // F48:F50 used for first 16-bytes + __ xor3(L2,G4,G1); + __ movxtod(G1,F48); + __ xor3(L3,G5,G1); + __ movxtod(G1,F50); + + // F60:F62 used for next 16-bytes + __ xor3(L2,L4,G1); + __ movxtod(G1,F60); + __ xor3(L3,L5,G1); + __ movxtod(G1,F62); + + for ( int i = 46; i >= 6; i -= 8 ) { + __ aes_dround23(as_FloatRegister(i), F48, F50, F52); + __ aes_dround01(as_FloatRegister(i-2), F48, F50, F54); + __ aes_dround23(as_FloatRegister(i), F60, F62, F58); + __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56); + if (i != 6 ) { + __ aes_dround23(as_FloatRegister(i-4), F54, F52, F50); + __ aes_dround01(as_FloatRegister(i-6), F54, F52, F48); + __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62); + __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60); + } else { + __ aes_dround23_l(as_FloatRegister(i-4), F54, F52, F50); + __ aes_dround01_l(as_FloatRegister(i-6), F54, F52, F48); + __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62); + __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60); + } + } + + __ movxtod(L0,F54); + __ movxtod(L1,F52); + __ fxor(FloatRegisterImpl::D, F54, F48, F48); + __ fxor(FloatRegisterImpl::D, F52, F50, F50); + + __ movxtod(G4,F56); + __ movxtod(G5,F58); + __ mov(L4,L0); + __ mov(L5,L1); + __ fxor(FloatRegisterImpl::D, F56, F60, F60); + __ fxor(FloatRegisterImpl::D, F58, F62, F62); + + // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero + __ andcc(to, 7, G1); + __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_next2_blocks192); + __ delayed()->edge8n(to, G0, G2); + + // aligned case: store output into the destination array + __ stf(FloatRegisterImpl::D, F48, to, 0); + __ stf(FloatRegisterImpl::D, F50, to, 8); + __ stf(FloatRegisterImpl::D, F60, to, 16); + __ stf(FloatRegisterImpl::D, F62, to, 24); + __ ba_short(L_check_decrypt_loop_end192); + + __ BIND(L_store_misaligned_output_next2_blocks192); + __ mov(8, G4); + __ sub(G4, G1, G4); + __ alignaddr(G4, G0, G4); + __ faligndata(F48, F50, F56); // F56 can be clobbered + __ faligndata(F50, F60, F50); + __ faligndata(F60, F62, F60); + __ faligndata(F62, F48, F48); + __ mov(to, G1); + __ and3(to, -8, to); + __ stpartialf(to, G2, F48, Assembler::ASI_PST8_PRIMARY); + __ stf(FloatRegisterImpl::D, F56, to, 8); + __ stf(FloatRegisterImpl::D, F50, to, 16); + __ stf(FloatRegisterImpl::D, F60, to, 24); + __ add(to, 32, to); + __ orn(G0, G2, G2); + __ stpartialf(to, G2, F48, Assembler::ASI_PST8_PRIMARY); + __ mov(G1, to); + + __ BIND(L_check_decrypt_loop_end192); + __ add(from, 32, from); + __ add(to, 32, to); + __ subcc(len_reg, 32, len_reg); + __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks192); + __ delayed()->nop(); + __ ba_short(L_cbcdec_end); + + __ align(OptoLoopAlignment); + __ BIND(L_dec_next2_blocks256); + __ nop(); + + // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero + __ andcc(from, 7, G0); + __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_next2_blocks256); + __ delayed()->mov(from, G1); // save original 'from' address before alignaddr + + // aligned case: load input into G4, G5, L4 and L5 + __ ldx(from,0,G4); + __ ldx(from,8,G5); + __ ldx(from,16,L4); + __ ldx(from,24,L5); + __ ba_short(L_transform_next2_blocks256); + + __ BIND(L_load_misaligned_next2_blocks256); + __ alignaddr(from, G0, from); + // F0, F2, F4, F60, F62 can be clobbered + __ ldf(FloatRegisterImpl::D, from, 0, F0); + __ ldf(FloatRegisterImpl::D, from, 8, F2); + __ ldf(FloatRegisterImpl::D, from, 16, F60); + __ ldf(FloatRegisterImpl::D, from, 24, F62); + __ ldf(FloatRegisterImpl::D, from, 32, F4); + __ faligndata(F0, F2, F0); + __ faligndata(F2, F60, F2); + __ faligndata(F60, F62, F60); + __ faligndata(F62, F4, F62); + __ movdtox(F0, G4); + __ movdtox(F2, G5); + __ movdtox(F60, L4); + __ movdtox(F62, L5); + __ mov(G1, from); + + __ BIND(L_transform_next2_blocks256); + // F0:F2 used for first 16-bytes + __ xor3(L2,G4,G1); + __ movxtod(G1,F0); + __ xor3(L3,G5,G1); + __ movxtod(G1,F2); + + // F60:F62 used for next 16-bytes + __ xor3(L2,L4,G1); + __ movxtod(G1,F60); + __ xor3(L3,L5,G1); + __ movxtod(G1,F62); + + __ aes_dround23(F54, F0, F2, F4); + __ aes_dround01(F52, F0, F2, F6); + __ aes_dround23(F54, F60, F62, F58); + __ aes_dround01(F52, F60, F62, F56); + __ aes_dround23(F50, F6, F4, F2); + __ aes_dround01(F48, F6, F4, F0); + __ aes_dround23(F50, F56, F58, F62); + __ aes_dround01(F48, F56, F58, F60); + // save F48:F54 in temp registers + __ movdtox(F54,G2); + __ movdtox(F52,G3); + __ movdtox(F50,L6); + __ movdtox(F48,G1); + for ( int i = 46; i >= 14; i -= 8 ) { + __ aes_dround23(as_FloatRegister(i), F0, F2, F4); + __ aes_dround01(as_FloatRegister(i-2), F0, F2, F6); + __ aes_dround23(as_FloatRegister(i), F60, F62, F58); + __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56); + __ aes_dround23(as_FloatRegister(i-4), F6, F4, F2); + __ aes_dround01(as_FloatRegister(i-6), F6, F4, F0); + __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62); + __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60); + } + // init F48:F54 with F0:F6 values (original key) + __ ldf(FloatRegisterImpl::D, original_key, 0, F48); + __ ldf(FloatRegisterImpl::D, original_key, 8, F50); + __ ldf(FloatRegisterImpl::D, original_key, 16, F52); + __ ldf(FloatRegisterImpl::D, original_key, 24, F54); + __ aes_dround23(F54, F0, F2, F4); + __ aes_dround01(F52, F0, F2, F6); + __ aes_dround23(F54, F60, F62, F58); + __ aes_dround01(F52, F60, F62, F56); + __ aes_dround23_l(F50, F6, F4, F2); + __ aes_dround01_l(F48, F6, F4, F0); + __ aes_dround23_l(F50, F56, F58, F62); + __ aes_dround01_l(F48, F56, F58, F60); + // re-init F48:F54 with their original values + __ movxtod(G2,F54); + __ movxtod(G3,F52); + __ movxtod(L6,F50); + __ movxtod(G1,F48); + + __ movxtod(L0,F6); + __ movxtod(L1,F4); + __ fxor(FloatRegisterImpl::D, F6, F0, F0); + __ fxor(FloatRegisterImpl::D, F4, F2, F2); + + __ movxtod(G4,F56); + __ movxtod(G5,F58); + __ mov(L4,L0); + __ mov(L5,L1); + __ fxor(FloatRegisterImpl::D, F56, F60, F60); + __ fxor(FloatRegisterImpl::D, F58, F62, F62); + + // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero + __ andcc(to, 7, G1); + __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_next2_blocks256); + __ delayed()->edge8n(to, G0, G2); + + // aligned case: store output into the destination array + __ stf(FloatRegisterImpl::D, F0, to, 0); + __ stf(FloatRegisterImpl::D, F2, to, 8); + __ stf(FloatRegisterImpl::D, F60, to, 16); + __ stf(FloatRegisterImpl::D, F62, to, 24); + __ ba_short(L_check_decrypt_loop_end256); + + __ BIND(L_store_misaligned_output_next2_blocks256); + __ mov(8, G4); + __ sub(G4, G1, G4); + __ alignaddr(G4, G0, G4); + __ faligndata(F0, F2, F56); // F56 can be clobbered + __ faligndata(F2, F60, F2); + __ faligndata(F60, F62, F60); + __ faligndata(F62, F0, F0); + __ mov(to, G1); + __ and3(to, -8, to); + __ stpartialf(to, G2, F0, Assembler::ASI_PST8_PRIMARY); + __ stf(FloatRegisterImpl::D, F56, to, 8); + __ stf(FloatRegisterImpl::D, F2, to, 16); + __ stf(FloatRegisterImpl::D, F60, to, 24); + __ add(to, 32, to); + __ orn(G0, G2, G2); + __ stpartialf(to, G2, F0, Assembler::ASI_PST8_PRIMARY); + __ mov(G1, to); + + __ BIND(L_check_decrypt_loop_end256); + __ add(from, 32, from); + __ add(to, 32, to); + __ subcc(len_reg, 32, len_reg); + __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks256); + __ delayed()->nop(); + + __ BIND(L_cbcdec_end); + // re-init initial vector for next block, 8-byte alignment is guaranteed + __ stx(L0, rvec, 0); + __ stx(L1, rvec, 8); + __ mov(L7, I0); + __ ret(); + __ delayed()->restore(); + + return start; + } + + address generate_sha1_implCompress(bool multi_block, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + Label L_sha1_loop, L_sha1_unaligned_input, L_sha1_unaligned_input_loop; + int i; + + Register buf = O0; // byte[] source+offset + Register state = O1; // int[] SHA.state + Register ofs = O2; // int offset + Register limit = O3; // int limit + + // load state into F0-F4 + for (i = 0; i < 5; i++) { + __ ldf(FloatRegisterImpl::S, state, i*4, as_FloatRegister(i)); + } + + __ andcc(buf, 7, G0); + __ br(Assembler::notZero, false, Assembler::pn, L_sha1_unaligned_input); + __ delayed()->nop(); + + __ BIND(L_sha1_loop); + // load buf into F8-F22 + for (i = 0; i < 8; i++) { + __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8)); + } + __ sha1(); + if (multi_block) { + __ add(ofs, 64, ofs); + __ add(buf, 64, buf); + __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha1_loop); + __ mov(ofs, O0); // to be returned + } + + // store F0-F4 into state and return + for (i = 0; i < 4; i++) { + __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4); + } + __ retl(); + __ delayed()->stf(FloatRegisterImpl::S, F4, state, 0x10); + + __ BIND(L_sha1_unaligned_input); + __ alignaddr(buf, G0, buf); + + __ BIND(L_sha1_unaligned_input_loop); + // load buf into F8-F22 + for (i = 0; i < 9; i++) { + __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8)); + } + for (i = 0; i < 8; i++) { + __ faligndata(as_FloatRegister(i*2 + 8), as_FloatRegister(i*2 + 10), as_FloatRegister(i*2 + 8)); + } + __ sha1(); + if (multi_block) { + __ add(ofs, 64, ofs); + __ add(buf, 64, buf); + __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha1_unaligned_input_loop); + __ mov(ofs, O0); // to be returned + } + + // store F0-F4 into state and return + for (i = 0; i < 4; i++) { + __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4); + } + __ retl(); + __ delayed()->stf(FloatRegisterImpl::S, F4, state, 0x10); + + return start; + } + + address generate_sha256_implCompress(bool multi_block, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + Label L_sha256_loop, L_sha256_unaligned_input, L_sha256_unaligned_input_loop; + int i; + + Register buf = O0; // byte[] source+offset + Register state = O1; // int[] SHA2.state + Register ofs = O2; // int offset + Register limit = O3; // int limit + + // load state into F0-F7 + for (i = 0; i < 8; i++) { + __ ldf(FloatRegisterImpl::S, state, i*4, as_FloatRegister(i)); + } + + __ andcc(buf, 7, G0); + __ br(Assembler::notZero, false, Assembler::pn, L_sha256_unaligned_input); + __ delayed()->nop(); + + __ BIND(L_sha256_loop); + // load buf into F8-F22 + for (i = 0; i < 8; i++) { + __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8)); + } + __ sha256(); + if (multi_block) { + __ add(ofs, 64, ofs); + __ add(buf, 64, buf); + __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha256_loop); + __ mov(ofs, O0); // to be returned + } + + // store F0-F7 into state and return + for (i = 0; i < 7; i++) { + __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4); + } + __ retl(); + __ delayed()->stf(FloatRegisterImpl::S, F7, state, 0x1c); + + __ BIND(L_sha256_unaligned_input); + __ alignaddr(buf, G0, buf); + + __ BIND(L_sha256_unaligned_input_loop); + // load buf into F8-F22 + for (i = 0; i < 9; i++) { + __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8)); + } + for (i = 0; i < 8; i++) { + __ faligndata(as_FloatRegister(i*2 + 8), as_FloatRegister(i*2 + 10), as_FloatRegister(i*2 + 8)); + } + __ sha256(); + if (multi_block) { + __ add(ofs, 64, ofs); + __ add(buf, 64, buf); + __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha256_unaligned_input_loop); + __ mov(ofs, O0); // to be returned + } + + // store F0-F7 into state and return + for (i = 0; i < 7; i++) { + __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4); + } + __ retl(); + __ delayed()->stf(FloatRegisterImpl::S, F7, state, 0x1c); + + return start; + } + + address generate_sha512_implCompress(bool multi_block, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + Label L_sha512_loop, L_sha512_unaligned_input, L_sha512_unaligned_input_loop; + int i; + + Register buf = O0; // byte[] source+offset + Register state = O1; // long[] SHA5.state + Register ofs = O2; // int offset + Register limit = O3; // int limit + + // load state into F0-F14 + for (i = 0; i < 8; i++) { + __ ldf(FloatRegisterImpl::D, state, i*8, as_FloatRegister(i*2)); + } + + __ andcc(buf, 7, G0); + __ br(Assembler::notZero, false, Assembler::pn, L_sha512_unaligned_input); + __ delayed()->nop(); + + __ BIND(L_sha512_loop); + // load buf into F16-F46 + for (i = 0; i < 16; i++) { + __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 16)); + } + __ sha512(); + if (multi_block) { + __ add(ofs, 128, ofs); + __ add(buf, 128, buf); + __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha512_loop); + __ mov(ofs, O0); // to be returned + } + + // store F0-F14 into state and return + for (i = 0; i < 7; i++) { + __ stf(FloatRegisterImpl::D, as_FloatRegister(i*2), state, i*8); + } + __ retl(); + __ delayed()->stf(FloatRegisterImpl::D, F14, state, 0x38); + + __ BIND(L_sha512_unaligned_input); + __ alignaddr(buf, G0, buf); + + __ BIND(L_sha512_unaligned_input_loop); + // load buf into F16-F46 + for (i = 0; i < 17; i++) { + __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 16)); + } + for (i = 0; i < 16; i++) { + __ faligndata(as_FloatRegister(i*2 + 16), as_FloatRegister(i*2 + 18), as_FloatRegister(i*2 + 16)); + } + __ sha512(); + if (multi_block) { + __ add(ofs, 128, ofs); + __ add(buf, 128, buf); + __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha512_unaligned_input_loop); + __ mov(ofs, O0); // to be returned + } + + // store F0-F14 into state and return + for (i = 0; i < 7; i++) { + __ stf(FloatRegisterImpl::D, as_FloatRegister(i*2), state, i*8); + } + __ retl(); + __ delayed()->stf(FloatRegisterImpl::D, F14, state, 0x38); + + return start; + } + + /* Single and multi-block ghash operations */ + address generate_ghash_processBlocks() { + __ align(CodeEntryAlignment); + Label L_ghash_loop, L_aligned, L_main; + StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks"); + address start = __ pc(); + + Register state = I0; + Register subkeyH = I1; + Register data = I2; + Register len = I3; + + __ save_frame(0); + + __ ldx(state, 0, O0); + __ ldx(state, 8, O1); + + // Loop label for multiblock operations + __ BIND(L_ghash_loop); + + // Check if 'data' is unaligned + __ andcc(data, 7, G1); + __ br(Assembler::zero, false, Assembler::pt, L_aligned); + __ delayed()->nop(); + + Register left_shift = L1; + Register right_shift = L2; + Register data_ptr = L3; + + // Get left and right shift values in bits + __ sll(G1, LogBitsPerByte, left_shift); + __ mov(64, right_shift); + __ sub(right_shift, left_shift, right_shift); + + // Align to read 'data' + __ sub(data, G1, data_ptr); + + // Load first 8 bytes of 'data' + __ ldx(data_ptr, 0, O4); + __ sllx(O4, left_shift, O4); + __ ldx(data_ptr, 8, O5); + __ srlx(O5, right_shift, G4); + __ bset(G4, O4); + + // Load second 8 bytes of 'data' + __ sllx(O5, left_shift, O5); + __ ldx(data_ptr, 16, G4); + __ srlx(G4, right_shift, G4); + __ ba(L_main); + __ delayed()->bset(G4, O5); + + // If 'data' is aligned, load normally + __ BIND(L_aligned); + __ ldx(data, 0, O4); + __ ldx(data, 8, O5); + + __ BIND(L_main); + __ ldx(subkeyH, 0, O2); + __ ldx(subkeyH, 8, O3); + + __ xor3(O0, O4, O0); + __ xor3(O1, O5, O1); + + __ xmulxhi(O0, O3, G3); + __ xmulx(O0, O2, O5); + __ xmulxhi(O1, O2, G4); + __ xmulxhi(O1, O3, G5); + __ xmulx(O0, O3, G1); + __ xmulx(O1, O3, G2); + __ xmulx(O1, O2, O3); + __ xmulxhi(O0, O2, O4); + + __ mov(0xE1, O0); + __ sllx(O0, 56, O0); + + __ xor3(O5, G3, O5); + __ xor3(O5, G4, O5); + __ xor3(G5, G1, G1); + __ xor3(G1, O3, G1); + __ srlx(G2, 63, O1); + __ srlx(G1, 63, G3); + __ sllx(G2, 63, O3); + __ sllx(G2, 58, O2); + __ xor3(O3, O2, O2); + + __ sllx(G1, 1, G1); + __ or3(G1, O1, G1); + + __ xor3(G1, O2, G1); + + __ sllx(G2, 1, G2); + + __ xmulxhi(G1, O0, O1); + __ xmulx(G1, O0, O2); + __ xmulxhi(G2, O0, O3); + __ xmulx(G2, O0, G1); + + __ xor3(O4, O1, O4); + __ xor3(O5, O2, O5); + __ xor3(O5, O3, O5); + + __ sllx(O4, 1, O2); + __ srlx(O5, 63, O3); + + __ or3(O2, O3, O0); + + __ sllx(O5, 1, O1); + __ srlx(G1, 63, O2); + __ or3(O1, O2, O1); + __ xor3(O1, G3, O1); + + __ deccc(len); + __ br(Assembler::notZero, true, Assembler::pt, L_ghash_loop); + __ delayed()->add(data, 16, data); + + __ stx(O0, I0, 0); + __ stx(O1, I0, 8); + + __ ret(); + __ delayed()->restore(); + + return start; + } + + /** + * Arguments: + * + * Inputs: + * O0 - int crc + * O1 - byte* buf + * O2 - int len + * O3 - int* table + * + * Output: + * O0 - int crc result + */ + address generate_updateBytesCRC32C() { + assert(UseCRC32CIntrinsics, "need CRC32C instruction"); + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C"); + address start = __ pc(); + + const Register crc = O0; // crc + const Register buf = O1; // source java byte array address + const Register len = O2; // number of bytes + const Register table = O3; // byteTable + + __ kernel_crc32c(crc, buf, len, table); + + __ retl(); + __ delayed()->nop(); + + return start; + } + +#define ADLER32_NUM_TEMPS 16 + + /** + * Arguments: + * + * Inputs: + * O0 - int adler + * O1 - byte* buff + * O2 - int len + * + * Output: + * O0 - int adler result + */ + address generate_updateBytesAdler32() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "updateBytesAdler32"); + address start = __ pc(); + + Label L_cleanup_loop, L_cleanup_loop_check; + Label L_main_loop_check, L_main_loop, L_inner_loop, L_inner_loop_check; + Label L_nmax_check_done; + + // Aliases + Register s1 = O0; + Register s2 = O3; + Register buff = O1; + Register len = O2; + Register temp[ADLER32_NUM_TEMPS] = {L0, L1, L2, L3, L4, L5, L6, L7, I0, I1, I2, I3, I4, I5, G3, I7}; + + // Max number of bytes we can process before having to take the mod + // 0x15B0 is 5552 in decimal, the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 + unsigned long NMAX = 0x15B0; + + // Zero-out the upper bits of len + __ clruwu(len); + + // Create the mask 0xFFFF + __ set64(0x00FFFF, O4, O5); // O5 is the temp register + + // s1 is initialized to the lower 16 bits of adler + // s2 is initialized to the upper 16 bits of adler + __ srlx(O0, 16, O5); // adler >> 16 + __ and3(O0, O4, s1); // s1 = (adler & 0xFFFF) + __ and3(O5, O4, s2); // s2 = ((adler >> 16) & 0xFFFF) + + // The pipelined loop needs at least 16 elements for 1 iteration + // It does check this, but it is more effective to skip to the cleanup loop + // Setup the constant for cutoff checking + __ mov(15, O4); + + // Check if we are above the cutoff, if not go to the cleanup loop immediately + __ cmp_and_br_short(len, O4, Assembler::lessEqualUnsigned, Assembler::pt, L_cleanup_loop_check); + + // Free up some registers for our use + for (int i = 0; i < ADLER32_NUM_TEMPS; i++) { + __ movxtod(temp[i], as_FloatRegister(2*i)); + } + + // Loop maintenance stuff is done at the end of the loop, so skip to there + __ ba_short(L_main_loop_check); + + __ BIND(L_main_loop); + + // Prologue for inner loop + __ ldub(buff, 0, L0); + __ dec(O5); + + for (int i = 1; i < 8; i++) { + __ ldub(buff, i, temp[i]); + } + + __ inc(buff, 8); + + // Inner loop processes 16 elements at a time, might never execute if only 16 elements + // to be processed by the outer loop + __ ba_short(L_inner_loop_check); + + __ BIND(L_inner_loop); + + for (int i = 0; i < 8; i++) { + __ ldub(buff, (2*i), temp[(8+(2*i)) % ADLER32_NUM_TEMPS]); + __ add(s1, temp[i], s1); + __ ldub(buff, (2*i)+1, temp[(8+(2*i)+1) % ADLER32_NUM_TEMPS]); + __ add(s2, s1, s2); + } + + // Original temp 0-7 used and new loads to temp 0-7 issued + // temp 8-15 ready to be consumed + __ add(s1, I0, s1); + __ dec(O5); + __ add(s2, s1, s2); + __ add(s1, I1, s1); + __ inc(buff, 16); + __ add(s2, s1, s2); + + for (int i = 0; i < 6; i++) { + __ add(s1, temp[10+i], s1); + __ add(s2, s1, s2); + } + + __ BIND(L_inner_loop_check); + __ nop(); + __ cmp_and_br_short(O5, 0, Assembler::notEqual, Assembler::pt, L_inner_loop); + + // Epilogue + for (int i = 0; i < 4; i++) { + __ ldub(buff, (2*i), temp[8+(2*i)]); + __ add(s1, temp[i], s1); + __ ldub(buff, (2*i)+1, temp[8+(2*i)+1]); + __ add(s2, s1, s2); + } + + __ add(s1, temp[4], s1); + __ inc(buff, 8); + + for (int i = 0; i < 11; i++) { + __ add(s2, s1, s2); + __ add(s1, temp[5+i], s1); + } + + __ add(s2, s1, s2); + + // Take the mod for s1 and s2 + __ set64(0xFFF1, L0, L1); + __ udivx(s1, L0, L1); + __ udivx(s2, L0, L2); + __ mulx(L0, L1, L1); + __ mulx(L0, L2, L2); + __ sub(s1, L1, s1); + __ sub(s2, L2, s2); + + // Make sure there is something left to process + __ BIND(L_main_loop_check); + __ set64(NMAX, L0, L1); + // k = len < NMAX ? len : NMAX + __ cmp_and_br_short(len, L0, Assembler::greaterEqualUnsigned, Assembler::pt, L_nmax_check_done); + __ andn(len, 0x0F, L0); // only loop a multiple of 16 times + __ BIND(L_nmax_check_done); + __ mov(L0, O5); + __ sub(len, L0, len); // len -= k + + __ srlx(O5, 4, O5); // multiplies of 16 + __ cmp_and_br_short(O5, 0, Assembler::notEqual, Assembler::pt, L_main_loop); + + // Restore anything we used, take the mod one last time, combine and return + // Restore any registers we saved + for (int i = 0; i < ADLER32_NUM_TEMPS; i++) { + __ movdtox(as_FloatRegister(2*i), temp[i]); + } + + // There might be nothing left to process + __ ba_short(L_cleanup_loop_check); + + __ BIND(L_cleanup_loop); + __ ldub(buff, 0, O4); // load single byte form buffer + __ inc(buff); // buff++ + __ add(s1, O4, s1); // s1 += *buff++; + __ dec(len); // len-- + __ add(s1, s2, s2); // s2 += s1; + __ BIND(L_cleanup_loop_check); + __ nop(); + __ cmp_and_br_short(len, 0, Assembler::notEqual, Assembler::pt, L_cleanup_loop); + + // Take the mod one last time + __ set64(0xFFF1, O1, O2); + __ udivx(s1, O1, O2); + __ udivx(s2, O1, O5); + __ mulx(O1, O2, O2); + __ mulx(O1, O5, O5); + __ sub(s1, O2, s1); + __ sub(s2, O5, s2); + + // Combine lower bits and higher bits + __ sllx(s2, 16, s2); // s2 = s2 << 16 + __ or3(s1, s2, s1); // adler = s2 | s1 + // Final return value is in O0 + __ retl(); + __ delayed()->nop(); + + return start; + } + + /** + * Arguments: + * + * Inputs: + * O0 - int crc + * O1 - byte* buf + * O2 - int len + * O3 - int* table + * + * Output: + * O0 - int crc result + */ + address generate_updateBytesCRC32() { + assert(UseCRC32Intrinsics, "need VIS3 instructions"); + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32"); + address start = __ pc(); + + const Register crc = O0; // crc + const Register buf = O1; // source java byte array address + const Register len = O2; // length + const Register table = O3; // crc_table address (reuse register) + + __ kernel_crc32(crc, buf, len, table); + + __ retl(); + __ delayed()->nop(); + + return start; + } + + /** + * Arguments: + * + * Inputs: + * I0 - int* x-addr + * I1 - int x-len + * I2 - int* y-addr + * I3 - int y-len + * I4 - int* z-addr (output vector) + * I5 - int z-len + */ + address generate_multiplyToLen() { + assert(UseMultiplyToLenIntrinsic, "need VIS3 instructions"); + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "multiplyToLen"); + address start = __ pc(); + + __ save_frame(0); + + const Register xptr = I0; // input address + const Register xlen = I1; // ...and length in 32b-words + const Register yptr = I2; // + const Register ylen = I3; // + const Register zptr = I4; // output address + const Register zlen = I5; // ...and length in 32b-words + + /* The minimal "limb" representation suggest that odd length vectors are as + * likely as even length dittos. This in turn suggests that we need to cope + * with odd/even length arrays and data not aligned properly for 64-bit read + * and write operations. We thus use a number of different kernels: + * + * if (is_even(x.len) && is_even(y.len)) + * if (is_align64(x) && is_align64(y) && is_align64(z)) + * if (x.len == y.len && 16 <= x.len && x.len <= 64) + * memv_mult_mpmul(...) + * else + * memv_mult_64x64(...) + * else + * memv_mult_64x64u(...) + * else + * memv_mult_32x32(...) + * + * Here we assume VIS3 support (for 'umulxhi', 'addxc' and 'addxccc'). + * In case CBCOND instructions are supported, we will use 'cxbX'. If the + * MPMUL instruction is supported, we will generate a kernel using 'mpmul' + * (for vectors with proper characteristics). + */ + const Register tmp0 = L0; + const Register tmp1 = L1; + + Label L_mult_32x32; + Label L_mult_64x64u; + Label L_mult_64x64; + Label L_exit; + + if_both_even(xlen, ylen, tmp0, false, L_mult_32x32); + if_all3_aligned(xptr, yptr, zptr, tmp1, 64, false, L_mult_64x64u); + + if (UseMPMUL) { + if_eq(xlen, ylen, false, L_mult_64x64); + if_in_rng(xlen, 16, 64, tmp0, tmp1, false, L_mult_64x64); + + // 1. Multiply naturally aligned 64b-datums using a generic 'mpmul' kernel, + // operating on equal length vectors of size [16..64]. + gen_mult_mpmul(xlen, xptr, yptr, zptr, L_exit); + } + + // 2. Multiply naturally aligned 64-bit datums (64x64). + __ bind(L_mult_64x64); + gen_mult_64x64(xptr, xlen, yptr, ylen, zptr, zlen, L_exit); + + // 3. Multiply unaligned 64-bit datums (64x64). + __ bind(L_mult_64x64u); + gen_mult_64x64_unaligned(xptr, xlen, yptr, ylen, zptr, zlen, L_exit); + + // 4. Multiply naturally aligned 32-bit datums (32x32). + __ bind(L_mult_32x32); + gen_mult_32x32(xptr, xlen, yptr, ylen, zptr, zlen, L_exit); + + __ bind(L_exit); + __ ret(); + __ delayed()->restore(); + + return start; + } + + // Additional help functions used by multiplyToLen generation. + + void if_both_even(Register r1, Register r2, Register tmp, bool iseven, Label &L) + { + __ or3(r1, r2, tmp); + __ andcc(tmp, 0x1, tmp); + __ br_icc_zero(iseven, Assembler::pn, L); + } + + void if_all3_aligned(Register r1, Register r2, Register r3, + Register tmp, uint align, bool isalign, Label &L) + { + __ or3(r1, r2, tmp); + __ or3(r3, tmp, tmp); + __ andcc(tmp, (align - 1), tmp); + __ br_icc_zero(isalign, Assembler::pn, L); + } + + void if_eq(Register x, Register y, bool iseq, Label &L) + { + Assembler::Condition cf = (iseq ? Assembler::equal : Assembler::notEqual); + __ cmp_and_br_short(x, y, cf, Assembler::pt, L); + } + + void if_in_rng(Register x, int lb, int ub, Register t1, Register t2, bool inrng, Label &L) + { + assert(Assembler::is_simm13(lb), "Small ints only!"); + assert(Assembler::is_simm13(ub), "Small ints only!"); + // Compute (x - lb) * (ub - x) >= 0 + // NOTE: With the local use of this routine, we rely on small integers to + // guarantee that we do not overflow in the multiplication. + __ add(G0, ub, t2); + __ sub(x, lb, t1); + __ sub(t2, x, t2); + __ mulx(t1, t2, t1); + Assembler::Condition cf = (inrng ? Assembler::greaterEqual : Assembler::less); + __ cmp_and_br_short(t1, G0, cf, Assembler::pt, L); + } + + void ldd_entry(Register base, Register offs, FloatRegister dest) + { + __ ldd(base, offs, dest); + __ inc(offs, 8); + } + + void ldx_entry(Register base, Register offs, Register dest) + { + __ ldx(base, offs, dest); + __ inc(offs, 8); + } + + void mpmul_entry(int m, Label &next) + { + __ mpmul(m); + __ cbcond(Assembler::equal, Assembler::icc, G0, G0, next); + } + + void stx_entry(Label &L, Register r1, Register r2, Register base, Register offs) + { + __ bind(L); + __ stx(r1, base, offs); + __ inc(offs, 8); + __ stx(r2, base, offs); + __ inc(offs, 8); + } + + void offs_entry(Label &Lbl0, Label &Lbl1) + { + assert(Lbl0.is_bound(), "must be"); + assert(Lbl1.is_bound(), "must be"); + + int offset = Lbl0.loc_pos() - Lbl1.loc_pos(); + + __ emit_data(offset); + } + + /* Generate the actual multiplication kernels for BigInteger vectors: + * + * 1. gen_mult_mpmul(...) + * + * 2. gen_mult_64x64(...) + * + * 3. gen_mult_64x64_unaligned(...) + * + * 4. gen_mult_32x32(...) + */ + void gen_mult_mpmul(Register len, Register xptr, Register yptr, Register zptr, + Label &L_exit) + { + const Register zero = G0; + const Register gxp = G1; // Need to use global registers across RWs. + const Register gyp = G2; + const Register gzp = G3; + const Register disp = G4; + const Register offs = G5; + + __ mov(xptr, gxp); + __ mov(yptr, gyp); + __ mov(zptr, gzp); + + /* Compute jump vector entry: + * + * 1. mpmul input size (0..31) x 64b + * 2. vector input size in 32b limbs (even number) + * 3. branch entries in reverse order (31..0), using two + * instructions per entry (2 * 4 bytes). + * + * displacement = byte_offset(bra_offset(len)) + * = byte_offset((64 - len)/2) + * = 8 * (64 - len)/2 + * = 4 * (64 - len) + */ + Register temp = I5; // Alright to use input regs. in first batch. + + __ sub(zero, len, temp); + __ add(temp, 64, temp); + __ sllx(temp, 2, disp); // disp := (64 - len) << 2 + + // Dispatch relative current PC, into instruction table below. + __ rdpc(temp); + __ add(temp, 16, temp); + __ jmp(temp, disp); + __ delayed()->clr(offs); + + ldd_entry(gxp, offs, F22); + ldd_entry(gxp, offs, F20); + ldd_entry(gxp, offs, F18); + ldd_entry(gxp, offs, F16); + ldd_entry(gxp, offs, F14); + ldd_entry(gxp, offs, F12); + ldd_entry(gxp, offs, F10); + ldd_entry(gxp, offs, F8); + ldd_entry(gxp, offs, F6); + ldd_entry(gxp, offs, F4); + ldx_entry(gxp, offs, I5); + ldx_entry(gxp, offs, I4); + ldx_entry(gxp, offs, I3); + ldx_entry(gxp, offs, I2); + ldx_entry(gxp, offs, I1); + ldx_entry(gxp, offs, I0); + ldx_entry(gxp, offs, L7); + ldx_entry(gxp, offs, L6); + ldx_entry(gxp, offs, L5); + ldx_entry(gxp, offs, L4); + ldx_entry(gxp, offs, L3); + ldx_entry(gxp, offs, L2); + ldx_entry(gxp, offs, L1); + ldx_entry(gxp, offs, L0); + ldd_entry(gxp, offs, F2); + ldd_entry(gxp, offs, F0); + ldx_entry(gxp, offs, O5); + ldx_entry(gxp, offs, O4); + ldx_entry(gxp, offs, O3); + ldx_entry(gxp, offs, O2); + ldx_entry(gxp, offs, O1); + ldx_entry(gxp, offs, O0); + + __ save(SP, -176, SP); + + const Register addr = gxp; // Alright to reuse 'gxp'. + + // Dispatch relative current PC, into instruction table below. + __ rdpc(addr); + __ add(addr, 16, addr); + __ jmp(addr, disp); + __ delayed()->clr(offs); + + ldd_entry(gyp, offs, F58); + ldd_entry(gyp, offs, F56); + ldd_entry(gyp, offs, F54); + ldd_entry(gyp, offs, F52); + ldd_entry(gyp, offs, F50); + ldd_entry(gyp, offs, F48); + ldd_entry(gyp, offs, F46); + ldd_entry(gyp, offs, F44); + ldd_entry(gyp, offs, F42); + ldd_entry(gyp, offs, F40); + ldd_entry(gyp, offs, F38); + ldd_entry(gyp, offs, F36); + ldd_entry(gyp, offs, F34); + ldd_entry(gyp, offs, F32); + ldd_entry(gyp, offs, F30); + ldd_entry(gyp, offs, F28); + ldd_entry(gyp, offs, F26); + ldd_entry(gyp, offs, F24); + ldx_entry(gyp, offs, O5); + ldx_entry(gyp, offs, O4); + ldx_entry(gyp, offs, O3); + ldx_entry(gyp, offs, O2); + ldx_entry(gyp, offs, O1); + ldx_entry(gyp, offs, O0); + ldx_entry(gyp, offs, L7); + ldx_entry(gyp, offs, L6); + ldx_entry(gyp, offs, L5); + ldx_entry(gyp, offs, L4); + ldx_entry(gyp, offs, L3); + ldx_entry(gyp, offs, L2); + ldx_entry(gyp, offs, L1); + ldx_entry(gyp, offs, L0); + + __ save(SP, -176, SP); + __ save(SP, -176, SP); + __ save(SP, -176, SP); + __ save(SP, -176, SP); + __ save(SP, -176, SP); + + Label L_mpmul_restore_4, L_mpmul_restore_3, L_mpmul_restore_2; + Label L_mpmul_restore_1, L_mpmul_restore_0; + + // Dispatch relative current PC, into instruction table below. + __ rdpc(addr); + __ add(addr, 16, addr); + __ jmp(addr, disp); + __ delayed()->clr(offs); + + mpmul_entry(31, L_mpmul_restore_0); + mpmul_entry(30, L_mpmul_restore_0); + mpmul_entry(29, L_mpmul_restore_0); + mpmul_entry(28, L_mpmul_restore_0); + mpmul_entry(27, L_mpmul_restore_1); + mpmul_entry(26, L_mpmul_restore_1); + mpmul_entry(25, L_mpmul_restore_1); + mpmul_entry(24, L_mpmul_restore_1); + mpmul_entry(23, L_mpmul_restore_1); + mpmul_entry(22, L_mpmul_restore_1); + mpmul_entry(21, L_mpmul_restore_1); + mpmul_entry(20, L_mpmul_restore_2); + mpmul_entry(19, L_mpmul_restore_2); + mpmul_entry(18, L_mpmul_restore_2); + mpmul_entry(17, L_mpmul_restore_2); + mpmul_entry(16, L_mpmul_restore_2); + mpmul_entry(15, L_mpmul_restore_2); + mpmul_entry(14, L_mpmul_restore_2); + mpmul_entry(13, L_mpmul_restore_3); + mpmul_entry(12, L_mpmul_restore_3); + mpmul_entry(11, L_mpmul_restore_3); + mpmul_entry(10, L_mpmul_restore_3); + mpmul_entry( 9, L_mpmul_restore_3); + mpmul_entry( 8, L_mpmul_restore_3); + mpmul_entry( 7, L_mpmul_restore_3); + mpmul_entry( 6, L_mpmul_restore_4); + mpmul_entry( 5, L_mpmul_restore_4); + mpmul_entry( 4, L_mpmul_restore_4); + mpmul_entry( 3, L_mpmul_restore_4); + mpmul_entry( 2, L_mpmul_restore_4); + mpmul_entry( 1, L_mpmul_restore_4); + mpmul_entry( 0, L_mpmul_restore_4); + + Label L_z31, L_z30, L_z29, L_z28, L_z27, L_z26, L_z25, L_z24; + Label L_z23, L_z22, L_z21, L_z20, L_z19, L_z18, L_z17, L_z16; + Label L_z15, L_z14, L_z13, L_z12, L_z11, L_z10, L_z09, L_z08; + Label L_z07, L_z06, L_z05, L_z04, L_z03, L_z02, L_z01, L_z00; + + Label L_zst_base; // Store sequence base address. + __ bind(L_zst_base); + + stx_entry(L_z31, L7, L6, gzp, offs); + stx_entry(L_z30, L5, L4, gzp, offs); + stx_entry(L_z29, L3, L2, gzp, offs); + stx_entry(L_z28, L1, L0, gzp, offs); + __ restore(); + stx_entry(L_z27, O5, O4, gzp, offs); + stx_entry(L_z26, O3, O2, gzp, offs); + stx_entry(L_z25, O1, O0, gzp, offs); + stx_entry(L_z24, L7, L6, gzp, offs); + stx_entry(L_z23, L5, L4, gzp, offs); + stx_entry(L_z22, L3, L2, gzp, offs); + stx_entry(L_z21, L1, L0, gzp, offs); + __ restore(); + stx_entry(L_z20, O5, O4, gzp, offs); + stx_entry(L_z19, O3, O2, gzp, offs); + stx_entry(L_z18, O1, O0, gzp, offs); + stx_entry(L_z17, L7, L6, gzp, offs); + stx_entry(L_z16, L5, L4, gzp, offs); + stx_entry(L_z15, L3, L2, gzp, offs); + stx_entry(L_z14, L1, L0, gzp, offs); + __ restore(); + stx_entry(L_z13, O5, O4, gzp, offs); + stx_entry(L_z12, O3, O2, gzp, offs); + stx_entry(L_z11, O1, O0, gzp, offs); + stx_entry(L_z10, L7, L6, gzp, offs); + stx_entry(L_z09, L5, L4, gzp, offs); + stx_entry(L_z08, L3, L2, gzp, offs); + stx_entry(L_z07, L1, L0, gzp, offs); + __ restore(); + stx_entry(L_z06, O5, O4, gzp, offs); + stx_entry(L_z05, O3, O2, gzp, offs); + stx_entry(L_z04, O1, O0, gzp, offs); + stx_entry(L_z03, L7, L6, gzp, offs); + stx_entry(L_z02, L5, L4, gzp, offs); + stx_entry(L_z01, L3, L2, gzp, offs); + stx_entry(L_z00, L1, L0, gzp, offs); + + __ restore(); + __ restore(); + // Exit out of 'mpmul' routine, back to multiplyToLen. + __ ba_short(L_exit); + + Label L_zst_offs; + __ bind(L_zst_offs); + + offs_entry(L_z31, L_zst_base); // index 31: 2048x2048 + offs_entry(L_z30, L_zst_base); + offs_entry(L_z29, L_zst_base); + offs_entry(L_z28, L_zst_base); + offs_entry(L_z27, L_zst_base); + offs_entry(L_z26, L_zst_base); + offs_entry(L_z25, L_zst_base); + offs_entry(L_z24, L_zst_base); + offs_entry(L_z23, L_zst_base); + offs_entry(L_z22, L_zst_base); + offs_entry(L_z21, L_zst_base); + offs_entry(L_z20, L_zst_base); + offs_entry(L_z19, L_zst_base); + offs_entry(L_z18, L_zst_base); + offs_entry(L_z17, L_zst_base); + offs_entry(L_z16, L_zst_base); + offs_entry(L_z15, L_zst_base); + offs_entry(L_z14, L_zst_base); + offs_entry(L_z13, L_zst_base); + offs_entry(L_z12, L_zst_base); + offs_entry(L_z11, L_zst_base); + offs_entry(L_z10, L_zst_base); + offs_entry(L_z09, L_zst_base); + offs_entry(L_z08, L_zst_base); + offs_entry(L_z07, L_zst_base); + offs_entry(L_z06, L_zst_base); + offs_entry(L_z05, L_zst_base); + offs_entry(L_z04, L_zst_base); + offs_entry(L_z03, L_zst_base); + offs_entry(L_z02, L_zst_base); + offs_entry(L_z01, L_zst_base); + offs_entry(L_z00, L_zst_base); // index 0: 64x64 + + __ bind(L_mpmul_restore_4); + __ restore(); + __ bind(L_mpmul_restore_3); + __ restore(); + __ bind(L_mpmul_restore_2); + __ restore(); + __ bind(L_mpmul_restore_1); + __ restore(); + __ bind(L_mpmul_restore_0); + + // Dispatch via offset vector entry, into z-store sequence. + Label L_zst_rdpc; + __ bind(L_zst_rdpc); + + assert(L_zst_base.is_bound(), "must be"); + assert(L_zst_offs.is_bound(), "must be"); + assert(L_zst_rdpc.is_bound(), "must be"); + + int dbase = L_zst_rdpc.loc_pos() - L_zst_base.loc_pos(); + int doffs = L_zst_rdpc.loc_pos() - L_zst_offs.loc_pos(); + + temp = gyp; // Alright to reuse 'gyp'. + + __ rdpc(addr); + __ sub(addr, doffs, temp); + __ srlx(disp, 1, disp); + __ lduw(temp, disp, offs); + __ sub(addr, dbase, temp); + __ jmp(temp, offs); + __ delayed()->clr(offs); + } + + void gen_mult_64x64(Register xp, Register xn, + Register yp, Register yn, + Register zp, Register zn, Label &L_exit) + { + // Assuming that a stack frame has already been created, i.e. local and + // output registers are available for immediate use. + + const Register ri = L0; // Outer loop index, xv[i] + const Register rj = L1; // Inner loop index, yv[j] + const Register rk = L2; // Output loop index, zv[k] + const Register rx = L4; // x-vector datum [i] + const Register ry = L5; // y-vector datum [j] + const Register rz = L6; // z-vector datum [k] + const Register rc = L7; // carry over (to z-vector datum [k-1]) + + const Register lop = O0; // lo-64b product + const Register hip = O1; // hi-64b product + + const Register zero = G0; + + Label L_loop_i, L_exit_loop_i; + Label L_loop_j; + Label L_loop_i2, L_exit_loop_i2; + + __ srlx(xn, 1, xn); // index for u32 to u64 ditto + __ srlx(yn, 1, yn); // index for u32 to u64 ditto + __ srlx(zn, 1, zn); // index for u32 to u64 ditto + __ dec(xn); // Adjust [0..(N/2)-1] + __ dec(yn); + __ dec(zn); + __ clr(rc); // u64 c = 0 + __ sllx(xn, 3, ri); // int i = xn (byte offset i = 8*xn) + __ sllx(yn, 3, rj); // int j = yn (byte offset i = 8*xn) + __ sllx(zn, 3, rk); // int k = zn (byte offset k = 8*zn) + __ ldx(yp, rj, ry); // u64 y = yp[yn] + + // for (int i = xn; i >= 0; i--) + __ bind(L_loop_i); + + __ cmp_and_br_short(ri, 0, // i >= 0 + Assembler::less, Assembler::pn, L_exit_loop_i); + __ ldx(xp, ri, rx); // x = xp[i] + __ mulx(rx, ry, lop); // lo-64b-part of result 64x64 + __ umulxhi(rx, ry, hip); // hi-64b-part of result 64x64 + __ addcc(rc, lop, lop); // Accumulate lower order bits (producing carry) + __ addxc(hip, zero, rc); // carry over to next datum [k-1] + __ stx(lop, zp, rk); // z[k] = lop + __ dec(rk, 8); // k-- + __ dec(ri, 8); // i-- + __ ba_short(L_loop_i); + + __ bind(L_exit_loop_i); + __ stx(rc, zp, rk); // z[k] = c + + // for (int j = yn - 1; j >= 0; j--) + __ sllx(yn, 3, rj); // int j = yn - 1 (byte offset j = 8*yn) + __ dec(rj, 8); + + __ bind(L_loop_j); + + __ cmp_and_br_short(rj, 0, // j >= 0 + Assembler::less, Assembler::pn, L_exit); + __ clr(rc); // u64 c = 0 + __ ldx(yp, rj, ry); // u64 y = yp[j] + + // for (int i = xn, k = --zn; i >= 0; i--) + __ dec(zn); // --zn + __ sllx(xn, 3, ri); // int i = xn (byte offset i = 8*xn) + __ sllx(zn, 3, rk); // int k = zn (byte offset k = 8*zn) + + __ bind(L_loop_i2); + + __ cmp_and_br_short(ri, 0, // i >= 0 + Assembler::less, Assembler::pn, L_exit_loop_i2); + __ ldx(xp, ri, rx); // x = xp[i] + __ ldx(zp, rk, rz); // z = zp[k], accumulator + __ mulx(rx, ry, lop); // lo-64b-part of result 64x64 + __ umulxhi(rx, ry, hip); // hi-64b-part of result 64x64 + __ addcc(rz, rc, rz); // Accumulate lower order bits, + __ addxc(hip, zero, rc); // Accumulate higher order bits to carry + __ addcc(rz, lop, rz); // z += lo(p) + c + __ addxc(rc, zero, rc); + __ stx(rz, zp, rk); // zp[k] = z + __ dec(rk, 8); // k-- + __ dec(ri, 8); // i-- + __ ba_short(L_loop_i2); + + __ bind(L_exit_loop_i2); + __ stx(rc, zp, rk); // z[k] = c + __ dec(rj, 8); // j-- + __ ba_short(L_loop_j); + } + + void gen_mult_64x64_unaligned(Register xp, Register xn, + Register yp, Register yn, + Register zp, Register zn, Label &L_exit) + { + // Assuming that a stack frame has already been created, i.e. local and + // output registers are available for use. + + const Register xpc = L0; // Outer loop cursor, xp[i] + const Register ypc = L1; // Inner loop cursor, yp[j] + const Register zpc = L2; // Output loop cursor, zp[k] + const Register rx = L4; // x-vector datum [i] + const Register ry = L5; // y-vector datum [j] + const Register rz = L6; // z-vector datum [k] + const Register rc = L7; // carry over (to z-vector datum [k-1]) + const Register rt = O2; + + const Register lop = O0; // lo-64b product + const Register hip = O1; // hi-64b product + + const Register zero = G0; + + Label L_loop_i, L_exit_loop_i; + Label L_loop_j; + Label L_loop_i2, L_exit_loop_i2; + + __ srlx(xn, 1, xn); // index for u32 to u64 ditto + __ srlx(yn, 1, yn); // index for u32 to u64 ditto + __ srlx(zn, 1, zn); // index for u32 to u64 ditto + __ dec(xn); // Adjust [0..(N/2)-1] + __ dec(yn); + __ dec(zn); + __ clr(rc); // u64 c = 0 + __ sllx(xn, 3, xpc); // u32* xpc = &xp[xn] (byte offset 8*xn) + __ add(xp, xpc, xpc); + __ sllx(yn, 3, ypc); // u32* ypc = &yp[yn] (byte offset 8*yn) + __ add(yp, ypc, ypc); + __ sllx(zn, 3, zpc); // u32* zpc = &zp[zn] (byte offset 8*zn) + __ add(zp, zpc, zpc); + __ lduw(ypc, 0, rt); // u64 y = yp[yn] + __ lduw(ypc, 4, ry); // ... + __ sllx(rt, 32, rt); + __ or3(rt, ry, ry); + + // for (int i = xn; i >= 0; i--) + __ bind(L_loop_i); + + __ cmp_and_brx_short(xpc, xp,// i >= 0 + Assembler::lessUnsigned, Assembler::pn, L_exit_loop_i); + __ lduw(xpc, 0, rt); // u64 x = xp[i] + __ lduw(xpc, 4, rx); // ... + __ sllx(rt, 32, rt); + __ or3(rt, rx, rx); + __ mulx(rx, ry, lop); // lo-64b-part of result 64x64 + __ umulxhi(rx, ry, hip); // hi-64b-part of result 64x64 + __ addcc(rc, lop, lop); // Accumulate lower order bits (producing carry) + __ addxc(hip, zero, rc); // carry over to next datum [k-1] + __ srlx(lop, 32, rt); + __ stw(rt, zpc, 0); // z[k] = lop + __ stw(lop, zpc, 4); // ... + __ dec(zpc, 8); // k-- (zpc--) + __ dec(xpc, 8); // i-- (xpc--) + __ ba_short(L_loop_i); + + __ bind(L_exit_loop_i); + __ srlx(rc, 32, rt); + __ stw(rt, zpc, 0); // z[k] = c + __ stw(rc, zpc, 4); + + // for (int j = yn - 1; j >= 0; j--) + __ sllx(yn, 3, ypc); // u32* ypc = &yp[yn] (byte offset 8*yn) + __ add(yp, ypc, ypc); + __ dec(ypc, 8); // yn - 1 (ypc--) + + __ bind(L_loop_j); + + __ cmp_and_brx_short(ypc, yp,// j >= 0 + Assembler::lessUnsigned, Assembler::pn, L_exit); + __ clr(rc); // u64 c = 0 + __ lduw(ypc, 0, rt); // u64 y = yp[j] (= *ypc) + __ lduw(ypc, 4, ry); // ... + __ sllx(rt, 32, rt); + __ or3(rt, ry, ry); + + // for (int i = xn, k = --zn; i >= 0; i--) + __ sllx(xn, 3, xpc); // u32* xpc = &xp[xn] (byte offset 8*xn) + __ add(xp, xpc, xpc); + __ dec(zn); // --zn + __ sllx(zn, 3, zpc); // u32* zpc = &zp[zn] (byte offset 8*zn) + __ add(zp, zpc, zpc); + + __ bind(L_loop_i2); + + __ cmp_and_brx_short(xpc, xp,// i >= 0 + Assembler::lessUnsigned, Assembler::pn, L_exit_loop_i2); + __ lduw(xpc, 0, rt); // u64 x = xp[i] (= *xpc) + __ lduw(xpc, 4, rx); // ... + __ sllx(rt, 32, rt); + __ or3(rt, rx, rx); + + __ lduw(zpc, 0, rt); // u64 z = zp[k] (= *zpc) + __ lduw(zpc, 4, rz); // ... + __ sllx(rt, 32, rt); + __ or3(rt, rz, rz); + + __ mulx(rx, ry, lop); // lo-64b-part of result 64x64 + __ umulxhi(rx, ry, hip); // hi-64b-part of result 64x64 + __ addcc(rz, rc, rz); // Accumulate lower order bits... + __ addxc(hip, zero, rc); // Accumulate higher order bits to carry + __ addcc(rz, lop, rz); // ... z += lo(p) + c + __ addxccc(rc, zero, rc); + __ srlx(rz, 32, rt); + __ stw(rt, zpc, 0); // zp[k] = z (*zpc = z) + __ stw(rz, zpc, 4); + __ dec(zpc, 8); // k-- (zpc--) + __ dec(xpc, 8); // i-- (xpc--) + __ ba_short(L_loop_i2); + + __ bind(L_exit_loop_i2); + __ srlx(rc, 32, rt); + __ stw(rt, zpc, 0); // z[k] = c + __ stw(rc, zpc, 4); + __ dec(ypc, 8); // j-- (ypc--) + __ ba_short(L_loop_j); + } + + void gen_mult_32x32(Register xp, Register xn, + Register yp, Register yn, + Register zp, Register zn, Label &L_exit) + { + // Assuming that a stack frame has already been created, i.e. local and + // output registers are available for use. + + const Register ri = L0; // Outer loop index, xv[i] + const Register rj = L1; // Inner loop index, yv[j] + const Register rk = L2; // Output loop index, zv[k] + const Register rx = L4; // x-vector datum [i] + const Register ry = L5; // y-vector datum [j] + const Register rz = L6; // z-vector datum [k] + const Register rc = L7; // carry over (to z-vector datum [k-1]) + + const Register p64 = O0; // 64b product + const Register z65 = O1; // carry+64b accumulator + const Register c65 = O2; // carry at bit 65 + const Register c33 = O2; // carry at bit 33 (after shift) + + const Register zero = G0; + + Label L_loop_i, L_exit_loop_i; + Label L_loop_j; + Label L_loop_i2, L_exit_loop_i2; + + __ dec(xn); // Adjust [0..N-1] + __ dec(yn); + __ dec(zn); + __ clr(rc); // u32 c = 0 + __ sllx(xn, 2, ri); // int i = xn (byte offset i = 4*xn) + __ sllx(yn, 2, rj); // int j = yn (byte offset i = 4*xn) + __ sllx(zn, 2, rk); // int k = zn (byte offset k = 4*zn) + __ lduw(yp, rj, ry); // u32 y = yp[yn] + + // for (int i = xn; i >= 0; i--) + __ bind(L_loop_i); + + __ cmp_and_br_short(ri, 0, // i >= 0 + Assembler::less, Assembler::pn, L_exit_loop_i); + __ lduw(xp, ri, rx); // x = xp[i] + __ mulx(rx, ry, p64); // 64b result of 32x32 + __ addcc(rc, p64, z65); // Accumulate to 65 bits (producing carry) + __ addxc(zero, zero, c65); // Materialise carry (in bit 65) into lsb, + __ sllx(c65, 32, c33); // and shift into bit 33 + __ srlx(z65, 32, rc); // carry = c33 | hi(z65) >> 32 + __ add(c33, rc, rc); // carry over to next datum [k-1] + __ stw(z65, zp, rk); // z[k] = lo(z65) + __ dec(rk, 4); // k-- + __ dec(ri, 4); // i-- + __ ba_short(L_loop_i); + + __ bind(L_exit_loop_i); + __ stw(rc, zp, rk); // z[k] = c + + // for (int j = yn - 1; j >= 0; j--) + __ sllx(yn, 2, rj); // int j = yn - 1 (byte offset j = 4*yn) + __ dec(rj, 4); + + __ bind(L_loop_j); + + __ cmp_and_br_short(rj, 0, // j >= 0 + Assembler::less, Assembler::pn, L_exit); + __ clr(rc); // u32 c = 0 + __ lduw(yp, rj, ry); // u32 y = yp[j] + + // for (int i = xn, k = --zn; i >= 0; i--) + __ dec(zn); // --zn + __ sllx(xn, 2, ri); // int i = xn (byte offset i = 4*xn) + __ sllx(zn, 2, rk); // int k = zn (byte offset k = 4*zn) + + __ bind(L_loop_i2); + + __ cmp_and_br_short(ri, 0, // i >= 0 + Assembler::less, Assembler::pn, L_exit_loop_i2); + __ lduw(xp, ri, rx); // x = xp[i] + __ lduw(zp, rk, rz); // z = zp[k], accumulator + __ mulx(rx, ry, p64); // 64b result of 32x32 + __ add(rz, rc, rz); // Accumulate lower order bits, + __ addcc(rz, p64, z65); // z += lo(p64) + c + __ addxc(zero, zero, c65); // Materialise carry (in bit 65) into lsb, + __ sllx(c65, 32, c33); // and shift into bit 33 + __ srlx(z65, 32, rc); // carry = c33 | hi(z65) >> 32 + __ add(c33, rc, rc); // carry over to next datum [k-1] + __ stw(z65, zp, rk); // zp[k] = lo(z65) + __ dec(rk, 4); // k-- + __ dec(ri, 4); // i-- + __ ba_short(L_loop_i2); + + __ bind(L_exit_loop_i2); + __ stw(rc, zp, rk); // z[k] = c + __ dec(rj, 4); // j-- + __ ba_short(L_loop_j); + } + + + void generate_initial() { + // Generates all stubs and initializes the entry points + + //------------------------------------------------------------------------------------------------------------------------ + // entry points that exist in all platforms + // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than + // the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp. + StubRoutines::_forward_exception_entry = generate_forward_exception(); + + StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); + StubRoutines::_catch_exception_entry = generate_catch_exception(); + + //------------------------------------------------------------------------------------------------------------------------ + // entry points that are platform specific + StubRoutines::Sparc::_test_stop_entry = generate_test_stop(); + + StubRoutines::Sparc::_stop_subroutine_entry = generate_stop_subroutine(); + StubRoutines::Sparc::_flush_callers_register_windows_entry = generate_flush_callers_register_windows(); + + // Build this early so it's available for the interpreter. + StubRoutines::_throw_StackOverflowError_entry = + generate_throw_exception("StackOverflowError throw_exception", + CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError)); + StubRoutines::_throw_delayed_StackOverflowError_entry = + generate_throw_exception("delayed StackOverflowError throw_exception", + CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError)); + + if (UseCRC32Intrinsics) { + // set table address before stub generation which use it + StubRoutines::_crc_table_adr = (address)StubRoutines::Sparc::_crc_table; + StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); + } + + if (UseCRC32CIntrinsics) { + // set table address before stub generation which use it + StubRoutines::_crc32c_table_addr = (address)StubRoutines::Sparc::_crc32c_table; + StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(); + } + } + + + void generate_all() { + // Generates all stubs and initializes the entry points + + // Generate partial_subtype_check first here since its code depends on + // UseZeroBaseCompressedOops which is defined after heap initialization. + StubRoutines::Sparc::_partial_subtype_check = generate_partial_subtype_check(); + // These entry points require SharedInfo::stack0 to be set up in non-core builds + StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError)); + StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError)); + StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call)); + + // support for verify_oop (must happen after universe_init) + StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop_subroutine(); + + // arraycopy stubs used by compilers + generate_arraycopy_stubs(); + + // Don't initialize the platform math functions since sparc + // doesn't have intrinsics for these operations. + + // generate AES intrinsics code + if (UseAESIntrinsics) { + StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); + StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); + StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); + StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel(); + } + // generate GHASH intrinsics code + if (UseGHASHIntrinsics) { + StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); + } + + // generate SHA1/SHA256/SHA512 intrinsics code + if (UseSHA1Intrinsics) { + StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress"); + StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB"); + } + if (UseSHA256Intrinsics) { + StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress"); + StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB"); + } + if (UseSHA512Intrinsics) { + StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress"); + StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB"); + } + // generate Adler32 intrinsics code + if (UseAdler32Intrinsics) { + StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32(); + } + +#ifdef COMPILER2 + // Intrinsics supported by C2 only: + if (UseMultiplyToLenIntrinsic) { + StubRoutines::_multiplyToLen = generate_multiplyToLen(); + } +#endif // COMPILER2 + } + + public: + StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { + _stub_count = !all ? 0x100 : 0x200; + if (all) { + generate_all(); + } else { + generate_initial(); + } + + // make sure this stub is available for all local calls + if (_atomic_add_stub.is_unbound()) { + // generate a second time, if necessary + (void) generate_atomic_add(); + } + } + + + private: + int _stub_count; + void stub_prolog(StubCodeDesc* cdesc) { + # ifdef ASSERT + // put extra information in the stub code, to make it more readable + // Write the high part of the address + // [RGV] Check if there is a dependency on the size of this prolog + __ emit_data((intptr_t)cdesc >> 32, relocInfo::none); + __ emit_data((intptr_t)cdesc, relocInfo::none); + __ emit_data(++_stub_count, relocInfo::none); + # endif + align(true); + } + + void align(bool at_header = false) { + // %%%%% move this constant somewhere else + // UltraSPARC cache line size is 8 instructions: + const unsigned int icache_line_size = 32; + const unsigned int icache_half_line_size = 16; + + if (at_header) { + while ((intptr_t)(__ pc()) % icache_line_size != 0) { + __ emit_data(0, relocInfo::none); + } + } else { + while ((intptr_t)(__ pc()) % icache_half_line_size != 0) { + __ nop(); + } + } + } + +}; // end class declaration + +#define UCM_TABLE_MAX_ENTRIES 8 +void StubGenerator_generate(CodeBuffer* code, bool all) { + if (UnsafeCopyMemory::_table == NULL) { + UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES); + } + StubGenerator g(code, all); +} diff -ur --new-file a/src/hotspot/cpu/sparc/stubRoutines_sparc.cpp b/src/hotspot/cpu/sparc/stubRoutines_sparc.cpp --- a/src/hotspot/cpu/sparc/stubRoutines_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/stubRoutines_sparc.cpp 2023-04-16 11:42:11.076867727 +0000 @@ -0,0 +1,208 @@ +/* + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" + +// Implementation of the platform-specific part of StubRoutines - for +// a description of how to extend it, see the stubRoutines.hpp file. + + +extern "C" { + address _flush_reg_windows(); // in .s file. + // Flush registers to stack. In case of error we will need to stack walk. + address bootstrap_flush_windows(void) { + Thread* thread = Thread::current_or_null(); + // Very early in process there is no thread. + if (thread != NULL) { + guarantee(thread->is_Java_thread(), "Not a Java thread."); + JavaThread* jt = (JavaThread*)thread; + guarantee(!jt->has_last_Java_frame(), "Must be able to flush registers!"); + } + return (address)_flush_reg_windows(); + }; +}; + +address StubRoutines::Sparc::_test_stop_entry = NULL; +address StubRoutines::Sparc::_stop_subroutine_entry = NULL; +address StubRoutines::Sparc::_flush_callers_register_windows_entry = CAST_FROM_FN_PTR(address, bootstrap_flush_windows); + +address StubRoutines::Sparc::_partial_subtype_check = NULL; + +uint64_t StubRoutines::Sparc::_crc_by128_masks[] = +{ + /* The fields in this structure are arranged so that they can be + * picked up two at a time with 128-bit loads. + * + * Because of flipped bit order for this CRC polynomials + * the constant for X**N is left-shifted by 1. This is because + * a 64 x 64 polynomial multiply produces a 127-bit result + * but the highest term is always aligned to bit 0 in the container. + * Pre-shifting by one fixes this, at the cost of potentially making + * the 32-bit constant no longer fit in a 32-bit container (thus the + * use of uint64_t, though this is also the size used by the carry- + * less multiply instruction. + * + * In addition, the flipped bit order and highest-term-at-least-bit + * multiply changes the constants used. The 96-bit result will be + * aligned to the high-term end of the target 128-bit container, + * not the low-term end; that is, instead of a 512-bit or 576-bit fold, + * instead it is a 480 (=512-32) or 544 (=512+64-32) bit fold. + * + * This cause additional problems in the 128-to-64-bit reduction; see the + * code for details. By storing a mask in the otherwise unused half of + * a 128-bit constant, bits can be cleared before multiplication without + * storing and reloading. Note that staying on a 128-bit datapath means + * that some data is uselessly stored and some unused data is intersected + * with an irrelevant constant. + */ + + ((uint64_t) 0xffffffffUL), /* low of K_M_64 */ + ((uint64_t) 0xb1e6b092U << 1), /* high of K_M_64 */ + ((uint64_t) 0xba8ccbe8U << 1), /* low of K_160_96 */ + ((uint64_t) 0x6655004fU << 1), /* high of K_160_96 */ + ((uint64_t) 0xaa2215eaU << 1), /* low of K_544_480 */ + ((uint64_t) 0xe3720acbU << 1) /* high of K_544_480 */ +}; + +/** + * crc_table[] from jdk/src/java.base/share/native/libzip/zlib-1.2.8/crc32.h + */ +juint StubRoutines::Sparc::_crc_table[] = +{ + 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, + 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, + 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, + 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, + 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, + 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, + 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, + 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, + 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, + 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, + 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, + 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, + 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, + 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, + 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, + 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, + 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, + 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, + 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, + 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, + 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, + 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, + 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, + 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, + 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, + 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, + 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, + 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, + 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, + 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, + 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, + 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, + 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, + 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, + 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, + 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, + 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, + 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, + 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, + 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, + 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, + 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, + 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, + 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, + 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, + 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, + 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, + 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, + 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, + 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, + 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, + 0x2d02ef8dUL +}; + +/** + * CRC32C constants lookup table + */ +juint StubRoutines::Sparc::_crc32c_table[] = +{ + 0x00000000UL, 0xF26B8303UL, 0xE13B70F7UL, 0x1350F3F4UL, 0xC79A971FUL, + 0x35F1141CUL, 0x26A1E7E8UL, 0xD4CA64EBUL, 0x8AD958CFUL, 0x78B2DBCCUL, + 0x6BE22838UL, 0x9989AB3BUL, 0x4D43CFD0UL, 0xBF284CD3UL, 0xAC78BF27UL, + 0x5E133C24UL, 0x105EC76FUL, 0xE235446CUL, 0xF165B798UL, 0x030E349BUL, + 0xD7C45070UL, 0x25AFD373UL, 0x36FF2087UL, 0xC494A384UL, 0x9A879FA0UL, + 0x68EC1CA3UL, 0x7BBCEF57UL, 0x89D76C54UL, 0x5D1D08BFUL, 0xAF768BBCUL, + 0xBC267848UL, 0x4E4DFB4BUL, 0x20BD8EDEUL, 0xD2D60DDDUL, 0xC186FE29UL, + 0x33ED7D2AUL, 0xE72719C1UL, 0x154C9AC2UL, 0x061C6936UL, 0xF477EA35UL, + 0xAA64D611UL, 0x580F5512UL, 0x4B5FA6E6UL, 0xB93425E5UL, 0x6DFE410EUL, + 0x9F95C20DUL, 0x8CC531F9UL, 0x7EAEB2FAUL, 0x30E349B1UL, 0xC288CAB2UL, + 0xD1D83946UL, 0x23B3BA45UL, 0xF779DEAEUL, 0x05125DADUL, 0x1642AE59UL, + 0xE4292D5AUL, 0xBA3A117EUL, 0x4851927DUL, 0x5B016189UL, 0xA96AE28AUL, + 0x7DA08661UL, 0x8FCB0562UL, 0x9C9BF696UL, 0x6EF07595UL, 0x417B1DBCUL, + 0xB3109EBFUL, 0xA0406D4BUL, 0x522BEE48UL, 0x86E18AA3UL, 0x748A09A0UL, + 0x67DAFA54UL, 0x95B17957UL, 0xCBA24573UL, 0x39C9C670UL, 0x2A993584UL, + 0xD8F2B687UL, 0x0C38D26CUL, 0xFE53516FUL, 0xED03A29BUL, 0x1F682198UL, + 0x5125DAD3UL, 0xA34E59D0UL, 0xB01EAA24UL, 0x42752927UL, 0x96BF4DCCUL, + 0x64D4CECFUL, 0x77843D3BUL, 0x85EFBE38UL, 0xDBFC821CUL, 0x2997011FUL, + 0x3AC7F2EBUL, 0xC8AC71E8UL, 0x1C661503UL, 0xEE0D9600UL, 0xFD5D65F4UL, + 0x0F36E6F7UL, 0x61C69362UL, 0x93AD1061UL, 0x80FDE395UL, 0x72966096UL, + 0xA65C047DUL, 0x5437877EUL, 0x4767748AUL, 0xB50CF789UL, 0xEB1FCBADUL, + 0x197448AEUL, 0x0A24BB5AUL, 0xF84F3859UL, 0x2C855CB2UL, 0xDEEEDFB1UL, + 0xCDBE2C45UL, 0x3FD5AF46UL, 0x7198540DUL, 0x83F3D70EUL, 0x90A324FAUL, + 0x62C8A7F9UL, 0xB602C312UL, 0x44694011UL, 0x5739B3E5UL, 0xA55230E6UL, + 0xFB410CC2UL, 0x092A8FC1UL, 0x1A7A7C35UL, 0xE811FF36UL, 0x3CDB9BDDUL, + 0xCEB018DEUL, 0xDDE0EB2AUL, 0x2F8B6829UL, 0x82F63B78UL, 0x709DB87BUL, + 0x63CD4B8FUL, 0x91A6C88CUL, 0x456CAC67UL, 0xB7072F64UL, 0xA457DC90UL, + 0x563C5F93UL, 0x082F63B7UL, 0xFA44E0B4UL, 0xE9141340UL, 0x1B7F9043UL, + 0xCFB5F4A8UL, 0x3DDE77ABUL, 0x2E8E845FUL, 0xDCE5075CUL, 0x92A8FC17UL, + 0x60C37F14UL, 0x73938CE0UL, 0x81F80FE3UL, 0x55326B08UL, 0xA759E80BUL, + 0xB4091BFFUL, 0x466298FCUL, 0x1871A4D8UL, 0xEA1A27DBUL, 0xF94AD42FUL, + 0x0B21572CUL, 0xDFEB33C7UL, 0x2D80B0C4UL, 0x3ED04330UL, 0xCCBBC033UL, + 0xA24BB5A6UL, 0x502036A5UL, 0x4370C551UL, 0xB11B4652UL, 0x65D122B9UL, + 0x97BAA1BAUL, 0x84EA524EUL, 0x7681D14DUL, 0x2892ED69UL, 0xDAF96E6AUL, + 0xC9A99D9EUL, 0x3BC21E9DUL, 0xEF087A76UL, 0x1D63F975UL, 0x0E330A81UL, + 0xFC588982UL, 0xB21572C9UL, 0x407EF1CAUL, 0x532E023EUL, 0xA145813DUL, + 0x758FE5D6UL, 0x87E466D5UL, 0x94B49521UL, 0x66DF1622UL, 0x38CC2A06UL, + 0xCAA7A905UL, 0xD9F75AF1UL, 0x2B9CD9F2UL, 0xFF56BD19UL, 0x0D3D3E1AUL, + 0x1E6DCDEEUL, 0xEC064EEDUL, 0xC38D26C4UL, 0x31E6A5C7UL, 0x22B65633UL, + 0xD0DDD530UL, 0x0417B1DBUL, 0xF67C32D8UL, 0xE52CC12CUL, 0x1747422FUL, + 0x49547E0BUL, 0xBB3FFD08UL, 0xA86F0EFCUL, 0x5A048DFFUL, 0x8ECEE914UL, + 0x7CA56A17UL, 0x6FF599E3UL, 0x9D9E1AE0UL, 0xD3D3E1ABUL, 0x21B862A8UL, + 0x32E8915CUL, 0xC083125FUL, 0x144976B4UL, 0xE622F5B7UL, 0xF5720643UL, + 0x07198540UL, 0x590AB964UL, 0xAB613A67UL, 0xB831C993UL, 0x4A5A4A90UL, + 0x9E902E7BUL, 0x6CFBAD78UL, 0x7FAB5E8CUL, 0x8DC0DD8FUL, 0xE330A81AUL, + 0x115B2B19UL, 0x020BD8EDUL, 0xF0605BEEUL, 0x24AA3F05UL, 0xD6C1BC06UL, + 0xC5914FF2UL, 0x37FACCF1UL, 0x69E9F0D5UL, 0x9B8273D6UL, 0x88D28022UL, + 0x7AB90321UL, 0xAE7367CAUL, 0x5C18E4C9UL, 0x4F48173DUL, 0xBD23943EUL, + 0xF36E6F75UL, 0x0105EC76UL, 0x12551F82UL, 0xE03E9C81UL, 0x34F4F86AUL, + 0xC69F7B69UL, 0xD5CF889DUL, 0x27A40B9EUL, 0x79B737BAUL, 0x8BDCB4B9UL, + 0x988C474DUL, 0x6AE7C44EUL, 0xBE2DA0A5UL, 0x4C4623A6UL, 0x5F16D052UL, + 0xAD7D5351UL +}; diff -ur --new-file a/src/hotspot/cpu/sparc/stubRoutines_sparc.hpp b/src/hotspot/cpu/sparc/stubRoutines_sparc.hpp --- a/src/hotspot/cpu/sparc/stubRoutines_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/stubRoutines_sparc.hpp 2023-04-16 11:42:11.077001864 +0000 @@ -0,0 +1,76 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_STUBROUTINES_SPARC_HPP +#define CPU_SPARC_STUBROUTINES_SPARC_HPP + +// This file holds the platform specific parts of the StubRoutines +// definition. See stubRoutines.hpp for a description on how to +// extend it. + + +// So unfortunately c2 will call with a pc from a frame object +// (already adjusted) and a raw pc (unadjusted), so we need to check both. +// It didn't use to be like this before adapter removal. +static bool returns_to_call_stub(address return_pc) { + return ((return_pc + frame::pc_return_offset) == _call_stub_return_address) || + (return_pc == _call_stub_return_address ); +} + +enum /* platform_dependent_constants */ { + // %%%%%%%% May be able to shrink this a lot + code_size1 = 20000, // simply increase if too small (assembler will crash if too small) + code_size2 = 29000 // simply increase if too small (assembler will crash if too small) +}; + +class Sparc { + friend class StubGenerator; + + private: + static address _test_stop_entry; + static address _stop_subroutine_entry; + static address _flush_callers_register_windows_entry; + + static address _partial_subtype_check; + // masks and table for CRC32 + static uint64_t _crc_by128_masks[]; + static juint _crc_table[]; + static juint _crc32c_table[]; + + public: + // test assembler stop routine by setting registers + static void (*test_stop_entry()) () { return CAST_TO_FN_PTR(void (*)(void), _test_stop_entry); } + + // a subroutine for debugging assembler code + static address stop_subroutine_entry_address() { return (address)&_stop_subroutine_entry; } + + // flushes (all but current) register window + static intptr_t* (*flush_callers_register_windows_func())() { return CAST_TO_FN_PTR(intptr_t* (*)(void), _flush_callers_register_windows_entry); } + + static address partial_subtype_check() { return _partial_subtype_check; } + + static address crc_by128_masks_addr() { return (address)_crc_by128_masks; } +}; + +#endif // CPU_SPARC_STUBROUTINES_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/templateInterpreterGenerator_sparc.cpp b/src/hotspot/cpu/sparc/templateInterpreterGenerator_sparc.cpp --- a/src/hotspot/cpu/sparc/templateInterpreterGenerator_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/templateInterpreterGenerator_sparc.cpp 2023-04-16 11:42:11.077684727 +0000 @@ -0,0 +1,1919 @@ +/* + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/templateInterpreterGenerator.hpp" +#include "interpreter/templateTable.hpp" +#include "oops/arrayOop.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/arguments.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/jniHandles.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "runtime/timer.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/align.hpp" +#include "utilities/debug.hpp" +#include "utilities/macros.hpp" + +// Size of interpreter code. Increase if too small. Interpreter will +// fail with a guarantee ("not enough space for interpreter generation"); +// if too small. +// Run with +PrintInterpreter to get the VM to print out the size. +// Max size with JVMTI +// The sethi() instruction generates lots more instructions when shell +// stack limit is unlimited, so that's why this is much bigger. +int TemplateInterpreter::InterpreterCodeSize = 260 * K; + +// Generation of Interpreter +// +// The TemplateInterpreterGenerator generates the interpreter into Interpreter::_code. + + +#define __ _masm-> + + +//---------------------------------------------------------------------------------------------------- + +// LP64 passes floating point arguments in F1, F3, F5, etc. instead of +// O0, O1, O2 etc.. +// Doubles are passed in D0, D2, D4 +// We store the signature of the first 16 arguments in the first argument +// slot because it will be overwritten prior to calling the native +// function, with the pointer to the JNIEnv. +// If LP64 there can be up to 16 floating point arguments in registers +// or 6 integer registers. +address TemplateInterpreterGenerator::generate_slow_signature_handler() { + + enum { + non_float = 0, + float_sig = 1, + double_sig = 2, + sig_mask = 3 + }; + + address entry = __ pc(); + Argument argv(0, true); + + // We are in the jni transition frame. Save the last_java_frame corresponding to the + // outer interpreter frame + // + __ set_last_Java_frame(FP, noreg); + // make sure the interpreter frame we've pushed has a valid return pc + __ mov(O7, I7); + __ mov(Lmethod, G3_scratch); + __ mov(Llocals, G4_scratch); + __ save_frame(0); + __ mov(G2_thread, L7_thread_cache); + __ add(argv.address_in_frame(), O3); + __ mov(G2_thread, O0); + __ mov(G3_scratch, O1); + __ call(CAST_FROM_FN_PTR(address, InterpreterRuntime::slow_signature_handler), relocInfo::runtime_call_type); + __ delayed()->mov(G4_scratch, O2); + __ mov(L7_thread_cache, G2_thread); + __ reset_last_Java_frame(); + + + // load the register arguments (the C code packed them as varargs) + Address Sig = argv.address_in_frame(); // Argument 0 holds the signature + __ ld_ptr( Sig, G3_scratch ); // Get register argument signature word into G3_scratch + __ mov( G3_scratch, G4_scratch); + __ srl( G4_scratch, 2, G4_scratch); // Skip Arg 0 + Label done; + for (Argument ldarg = argv.successor(); ldarg.is_float_register(); ldarg = ldarg.successor()) { + Label NonFloatArg; + Label LoadFloatArg; + Label LoadDoubleArg; + Label NextArg; + Address a = ldarg.address_in_frame(); + __ andcc(G4_scratch, sig_mask, G3_scratch); + __ br(Assembler::zero, false, Assembler::pt, NonFloatArg); + __ delayed()->nop(); + + __ cmp(G3_scratch, float_sig ); + __ br(Assembler::equal, false, Assembler::pt, LoadFloatArg); + __ delayed()->nop(); + + __ cmp(G3_scratch, double_sig ); + __ br(Assembler::equal, false, Assembler::pt, LoadDoubleArg); + __ delayed()->nop(); + + __ bind(NonFloatArg); + // There are only 6 integer register arguments! + if ( ldarg.is_register() ) + __ ld_ptr(ldarg.address_in_frame(), ldarg.as_register()); + else { + // Optimization, see if there are any more args and get out prior to checking + // all 16 float registers. My guess is that this is rare. + // If is_register is false, then we are done the first six integer args. + __ br_null_short(G4_scratch, Assembler::pt, done); + } + __ ba(NextArg); + __ delayed()->srl( G4_scratch, 2, G4_scratch ); + + __ bind(LoadFloatArg); + __ ldf( FloatRegisterImpl::S, a, ldarg.as_float_register(), 4); + __ ba(NextArg); + __ delayed()->srl( G4_scratch, 2, G4_scratch ); + + __ bind(LoadDoubleArg); + __ ldf( FloatRegisterImpl::D, a, ldarg.as_double_register() ); + __ ba(NextArg); + __ delayed()->srl( G4_scratch, 2, G4_scratch ); + + __ bind(NextArg); + } + + __ bind(done); + __ ret(); + __ delayed()->restore(O0, 0, Lscratch); // caller's Lscratch gets the result handler + + return entry; +} + +void TemplateInterpreterGenerator::generate_counter_overflow(Label& Lcontinue) { + + // Generate code to initiate compilation on the counter overflow. + + // InterpreterRuntime::frequency_counter_overflow takes two arguments, + // the first indicates if the counter overflow occurs at a backwards branch (NULL bcp) + // and the second is only used when the first is true. We pass zero for both. + // The call returns the address of the verified entry point for the method or NULL + // if the compilation did not complete (either went background or bailed out). + __ set((int)false, O2); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), O2, O2, true); + // returns verified_entry_point or NULL + // we ignore it in any case + __ ba_short(Lcontinue); +} + + +// End of helpers + +// Various method entries + +// Abstract method entry +// Attempt to execute abstract method. Throw exception +// +address TemplateInterpreterGenerator::generate_abstract_entry(void) { + address entry = __ pc(); + // abstract method entry + // throw exception + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorWithMethod), G5_method); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); + return entry; +} + +void TemplateInterpreterGenerator::save_native_result(void) { + // result potentially in O0/O1: save it across calls + const Address& l_tmp = InterpreterMacroAssembler::l_tmp; + + // result potentially in F0/F1: save it across calls + const Address& d_tmp = InterpreterMacroAssembler::d_tmp; + + // save and restore any potential method result value around the unlocking operation + __ stf(FloatRegisterImpl::D, F0, d_tmp); + __ stx(O0, l_tmp); +} + +void TemplateInterpreterGenerator::restore_native_result(void) { + const Address& l_tmp = InterpreterMacroAssembler::l_tmp; + const Address& d_tmp = InterpreterMacroAssembler::d_tmp; + + // Restore any method result value + __ ldf(FloatRegisterImpl::D, d_tmp, F0); + __ ldx(l_tmp, O0); +} + +address TemplateInterpreterGenerator::generate_exception_handler_common(const char* name, const char* message, bool pass_oop) { + assert(!pass_oop || message == NULL, "either oop or message but not both"); + address entry = __ pc(); + // expression stack must be empty before entering the VM if an exception happened + __ empty_expression_stack(); + // load exception object + __ set((intptr_t)name, G3_scratch); + if (pass_oop) { + __ call_VM(Oexception, CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), G3_scratch, Otos_i); + } else { + __ set((intptr_t)message, G4_scratch); + __ call_VM(Oexception, CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), G3_scratch, G4_scratch); + } + // throw exception + assert(Interpreter::throw_exception_entry() != NULL, "generate it first"); + AddressLiteral thrower(Interpreter::throw_exception_entry()); + __ jump_to(thrower, G3_scratch); + __ delayed()->nop(); + return entry; +} + +address TemplateInterpreterGenerator::generate_ClassCastException_handler() { + address entry = __ pc(); + // expression stack must be empty before entering the VM if an exception + // happened + __ empty_expression_stack(); + // load exception object + __ call_VM(Oexception, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_ClassCastException), + Otos_i); + __ should_not_reach_here(); + return entry; +} + + +address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() { + address entry = __ pc(); + // expression stack must be empty before entering the VM if an exception happened + __ empty_expression_stack(); + // Pass the array to create more detailed exceptions. + // convention: expect aberrant index in register G3_scratch, then shuffle the + // index to G4_scratch for the VM call + __ call_VM(Oexception, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), G3_scratch, Otos_i); + __ should_not_reach_here(); + return entry; +} + + +address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { + address entry = __ pc(); + // expression stack must be empty before entering the VM if an exception happened + __ empty_expression_stack(); + __ call_VM(Oexception, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError)); + __ should_not_reach_here(); + return entry; +} + + +address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { + address entry = __ pc(); + + if (state == atos) { + __ profile_return_type(O0, G3_scratch, G1_scratch); + } + + // The callee returns with the stack possibly adjusted by adapter transition + // We remove that possible adjustment here. + // All interpreter local registers are untouched. Any result is passed back + // in the O0/O1 or float registers. Before continuing, the arguments must be + // popped from the java expression stack; i.e., Lesp must be adjusted. + + __ mov(Llast_SP, SP); // Remove any adapter added stack space. + + const Register cache = G3_scratch; + const Register index = G1_scratch; + __ get_cache_and_index_at_bcp(cache, index, 1, index_size); + + const Register flags = cache; + __ ld_ptr(cache, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset(), flags); + const Register parameter_size = flags; + __ and3(flags, ConstantPoolCacheEntry::parameter_size_mask, parameter_size); // argument size in words + __ sll(parameter_size, Interpreter::logStackElementSize, parameter_size); // each argument size in bytes + __ add(Lesp, parameter_size, Lesp); // pop arguments + + __ check_and_handle_popframe(Gtemp); + __ check_and_handle_earlyret(Gtemp); + + __ dispatch_next(state, step); + + return entry; +} + + +address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, int step, address continuation) { + address entry = __ pc(); + __ get_constant_pool_cache(LcpoolCache); // load LcpoolCache +#if INCLUDE_JVMCI + // Check if we need to take lock at entry of synchronized method. This can + // only occur on method entry so emit it only for vtos with step 0. + if (EnableJVMCI && state == vtos && step == 0) { + Label L; + Address pending_monitor_enter_addr(G2_thread, JavaThread::pending_monitorenter_offset()); + __ ldbool(pending_monitor_enter_addr, Gtemp); // Load if pending monitor enter + __ cmp_and_br_short(Gtemp, G0, Assembler::equal, Assembler::pn, L); + // Clear flag. + __ stbool(G0, pending_monitor_enter_addr); + // Take lock. + lock_method(); + __ bind(L); + } else { +#ifdef ASSERT + if (EnableJVMCI) { + Label L; + Address pending_monitor_enter_addr(G2_thread, JavaThread::pending_monitorenter_offset()); + __ ldbool(pending_monitor_enter_addr, Gtemp); // Load if pending monitor enter + __ cmp_and_br_short(Gtemp, G0, Assembler::equal, Assembler::pn, L); + __ stop("unexpected pending monitor in deopt entry"); + __ bind(L); + } +#endif + } +#endif + { Label L; + Address exception_addr(G2_thread, Thread::pending_exception_offset()); + __ ld_ptr(exception_addr, Gtemp); // Load pending exception. + __ br_null_short(Gtemp, Assembler::pt, L); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception)); + __ should_not_reach_here(); + __ bind(L); + } + if (continuation == NULL) { + __ dispatch_next(state, step); + } else { + __ jump_to_entry(continuation); + } + return entry; +} + +// A result handler converts/unboxes a native call result into +// a java interpreter/compiler result. The current frame is an +// interpreter frame. The activation frame unwind code must be +// consistent with that of TemplateTable::_return(...). In the +// case of native methods, the caller's SP was not modified. +address TemplateInterpreterGenerator::generate_result_handler_for(BasicType type) { + address entry = __ pc(); + Register Itos_i = Otos_i ->after_save(); + Register Itos_l = Otos_l ->after_save(); + Register Itos_l1 = Otos_l1->after_save(); + Register Itos_l2 = Otos_l2->after_save(); + switch (type) { + case T_BOOLEAN: __ subcc(G0, O0, G0); __ addc(G0, 0, Itos_i); break; // !0 => true; 0 => false + case T_CHAR : __ sll(O0, 16, O0); __ srl(O0, 16, Itos_i); break; // cannot use and3, 0xFFFF too big as immediate value! + case T_BYTE : __ sll(O0, 24, O0); __ sra(O0, 24, Itos_i); break; + case T_SHORT : __ sll(O0, 16, O0); __ sra(O0, 16, Itos_i); break; + case T_LONG : + case T_INT : __ mov(O0, Itos_i); break; + case T_VOID : /* nothing to do */ break; + case T_FLOAT : assert(F0 == Ftos_f, "fix this code" ); break; + case T_DOUBLE : assert(F0 == Ftos_d, "fix this code" ); break; + case T_OBJECT : + __ ld_ptr(FP, (frame::interpreter_frame_oop_temp_offset*wordSize) + STACK_BIAS, Itos_i); + __ verify_oop(Itos_i); + break; + default : ShouldNotReachHere(); + } + __ ret(); // return from interpreter activation + __ delayed()->restore(I5_savedSP, G0, SP); // remove interpreter frame + NOT_PRODUCT(__ emit_int32(0);) // marker for disassembly + return entry; +} + +address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state, address runtime_entry) { + address entry = __ pc(); + __ push(state); + __ call_VM(noreg, runtime_entry); + __ dispatch_via(vtos, Interpreter::normal_table(vtos)); + return entry; +} + + +// +// Helpers for commoning out cases in the various type of method entries. +// + +// increment invocation count & check for overflow +// +// Note: checking for negative value instead of overflow +// so we have a 'sticky' overflow test +// +// Lmethod: method +// ??: invocation counter +// +void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow) { + // Note: In tiered we increment either counters in MethodCounters* or in + // MDO depending if we're profiling or not. + const Register G3_method_counters = G3_scratch; + Label done; + + const int increment = InvocationCounter::count_increment; + Label no_mdo; + if (ProfileInterpreter) { + // If no method data exists, go to profile_continue. + __ ld_ptr(Lmethod, Method::method_data_offset(), G4_scratch); + __ br_null_short(G4_scratch, Assembler::pn, no_mdo); + // Increment counter + Address mdo_invocation_counter(G4_scratch, + in_bytes(MethodData::invocation_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + Address mask(G4_scratch, in_bytes(MethodData::invoke_mask_offset())); + __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, + G3_scratch, Lscratch, + Assembler::zero, overflow); + __ ba_short(done); + } + + // Increment counter in MethodCounters* + __ bind(no_mdo); + Address invocation_counter(G3_method_counters, + in_bytes(MethodCounters::invocation_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + __ get_method_counters(Lmethod, G3_method_counters, done); + Address mask(G3_method_counters, in_bytes(MethodCounters::invoke_mask_offset())); + __ increment_mask_and_jump(invocation_counter, increment, mask, + G4_scratch, Lscratch, + Assembler::zero, overflow); + __ bind(done); +} + +// Allocate monitor and lock method (asm interpreter) +// ebx - Method* +// +void TemplateInterpreterGenerator::lock_method() { + __ ld(Lmethod, in_bytes(Method::access_flags_offset()), O0); // Load access flags. + +#ifdef ASSERT + { Label ok; + __ btst(JVM_ACC_SYNCHRONIZED, O0); + __ br( Assembler::notZero, false, Assembler::pt, ok); + __ delayed()->nop(); + __ stop("method doesn't need synchronization"); + __ bind(ok); + } +#endif // ASSERT + + // get synchronization object to O0 + { Label done; + __ btst(JVM_ACC_STATIC, O0); + __ br( Assembler::zero, true, Assembler::pt, done); + __ delayed()->ld_ptr(Llocals, Interpreter::local_offset_in_bytes(0), O0); // get receiver for not-static case + + // lock the mirror, not the Klass* + __ load_mirror(O0, Lmethod, Lscratch); + +#ifdef ASSERT + __ tst(O0); + __ breakpoint_trap(Assembler::zero, Assembler::ptr_cc); +#endif // ASSERT + + __ bind(done); + } + + __ add_monitor_to_stack(true, noreg, noreg); // allocate monitor elem + __ st_ptr( O0, Lmonitors, BasicObjectLock::obj_offset_in_bytes()); // store object + // __ untested("lock_object from method entry"); + __ lock_object(Lmonitors, O0); +} + +// See if we've got enough room on the stack for locals plus overhead below +// JavaThread::stack_overflow_limit(). If not, throw a StackOverflowError +// without going through the signal handler, i.e., reserved and yellow zones +// will not be made usable. The shadow zone must suffice to handle the +// overflow. +void TemplateInterpreterGenerator::generate_stack_overflow_check(Register Rframe_size, + Register Rscratch) { + const int page_size = os::vm_page_size(); + Label after_frame_check; + + assert_different_registers(Rframe_size, Rscratch); + + __ set(page_size, Rscratch); + __ cmp_and_br_short(Rframe_size, Rscratch, Assembler::lessEqual, Assembler::pt, after_frame_check); + + // Get the stack overflow limit, and in debug, verify it is non-zero. + __ ld_ptr(G2_thread, JavaThread::stack_overflow_limit_offset(), Rscratch); +#ifdef ASSERT + Label limit_ok; + __ br_notnull_short(Rscratch, Assembler::pn, limit_ok); + __ stop("stack overflow limit is zero in generate_stack_overflow_check"); + __ bind(limit_ok); +#endif + + // Add in the size of the frame (which is the same as subtracting it from the + // SP, which would take another register. + __ add(Rscratch, Rframe_size, Rscratch); + + // The frame is greater than one page in size, so check against + // the bottom of the stack. + __ cmp_and_brx_short(SP, Rscratch, Assembler::greaterUnsigned, Assembler::pt, after_frame_check); + + // The stack will overflow, throw an exception. + + // Note that SP is restored to sender's sp (in the delay slot). This + // is necessary if the sender's frame is an extended compiled frame + // (see gen_c2i_adapter()) and safer anyway in case of JSR292 + // adaptations. + + // Note also that the restored frame is not necessarily interpreted. + // Use the shared runtime version of the StackOverflowError. + assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); + AddressLiteral stub(StubRoutines::throw_StackOverflowError_entry()); + __ jump_to(stub, Rscratch); + __ delayed()->mov(O5_savedSP, SP); + + // If you get to here, then there is enough stack space. + __ bind(after_frame_check); +} + + +// +// Generate a fixed interpreter frame. This is identical setup for interpreted +// methods and for native methods hence the shared code. + + +//---------------------------------------------------------------------------------------------------- +// Stack frame layout +// +// When control flow reaches any of the entry types for the interpreter +// the following holds -> +// +// C2 Calling Conventions: +// +// The entry code below assumes that the following registers are set +// when coming in: +// G5_method: holds the Method* of the method to call +// Lesp: points to the TOS of the callers expression stack +// after having pushed all the parameters +// +// The entry code does the following to setup an interpreter frame +// pop parameters from the callers stack by adjusting Lesp +// set O0 to Lesp +// compute X = (max_locals - num_parameters) +// bump SP up by X to accommodate the extra locals +// compute X = max_expression_stack +// + vm_local_words +// + 16 words of register save area +// save frame doing a save sp, -X, sp growing towards lower addresses +// set Lbcp, Lmethod, LcpoolCache +// set Llocals to i0 +// set Lmonitors to FP - rounded_vm_local_words +// set Lesp to Lmonitors - 4 +// +// The frame has now been setup to do the rest of the entry code + +// Try this optimization: Most method entries could live in a +// "one size fits all" stack frame without all the dynamic size +// calculations. It might be profitable to do all this calculation +// statically and approximately for "small enough" methods. + +//----------------------------------------------------------------------------------------------- + +// C1 Calling conventions +// +// Upon method entry, the following registers are setup: +// +// g2 G2_thread: current thread +// g5 G5_method: method to activate +// g4 Gargs : pointer to last argument +// +// +// Stack: +// +// +---------------+ <--- sp +// | | +// : reg save area : +// | | +// +---------------+ <--- sp + 0x40 +// | | +// : extra 7 slots : note: these slots are not really needed for the interpreter (fix later) +// | | +// +---------------+ <--- sp + 0x5c +// | | +// : free : +// | | +// +---------------+ <--- Gargs +// | | +// : arguments : +// | | +// +---------------+ +// | | +// +// +// +// AFTER FRAME HAS BEEN SETUP for method interpretation the stack looks like: +// +// +---------------+ <--- sp +// | | +// : reg save area : +// | | +// +---------------+ <--- sp + 0x40 +// | | +// : extra 7 slots : note: these slots are not really needed for the interpreter (fix later) +// | | +// +---------------+ <--- sp + 0x5c +// | | +// : : +// | | <--- Lesp +// +---------------+ <--- Lmonitors (fp - 0x18) +// | VM locals | +// +---------------+ <--- fp +// | | +// : reg save area : +// | | +// +---------------+ <--- fp + 0x40 +// | | +// : extra 7 slots : note: these slots are not really needed for the interpreter (fix later) +// | | +// +---------------+ <--- fp + 0x5c +// | | +// : free : +// | | +// +---------------+ +// | | +// : nonarg locals : +// | | +// +---------------+ +// | | +// : arguments : +// | | <--- Llocals +// +---------------+ <--- Gargs +// | | + +void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { + // + // + // The entry code sets up a new interpreter frame in 4 steps: + // + // 1) Increase caller's SP by for the extra local space needed: + // (check for overflow) + // Efficient implementation of xload/xstore bytecodes requires + // that arguments and non-argument locals are in a contiguously + // addressable memory block => non-argument locals must be + // allocated in the caller's frame. + // + // 2) Create a new stack frame and register window: + // The new stack frame must provide space for the standard + // register save area, the maximum java expression stack size, + // the monitor slots (0 slots initially), and some frame local + // scratch locations. + // + // 3) The following interpreter activation registers must be setup: + // Lesp : expression stack pointer + // Lbcp : bytecode pointer + // Lmethod : method + // Llocals : locals pointer + // Lmonitors : monitor pointer + // LcpoolCache: constant pool cache + // + // 4) Initialize the non-argument locals if necessary: + // Non-argument locals may need to be initialized to NULL + // for GC to work. If the oop-map information is accurate + // (in the absence of the JSR problem), no initialization + // is necessary. + // + // (gri - 2/25/2000) + + + int rounded_vm_local_words = align_up((int)frame::interpreter_frame_vm_local_words, WordsPerLong ); + + const int extra_space = + rounded_vm_local_words + // frame local scratch space + Method::extra_stack_entries() + // extra stack for jsr 292 + frame::memory_parameter_word_sp_offset + // register save area + (native_call ? frame::interpreter_frame_extra_outgoing_argument_words : 0); + + const Register Glocals_size = G3; + const Register RconstMethod = Glocals_size; + const Register Otmp1 = O3; + const Register Otmp2 = O4; + // Lscratch can't be used as a temporary because the call_stub uses + // it to assert that the stack frame was setup correctly. + const Address constMethod (G5_method, Method::const_offset()); + const Address size_of_parameters(RconstMethod, ConstMethod::size_of_parameters_offset()); + + __ ld_ptr( constMethod, RconstMethod ); + __ lduh( size_of_parameters, Glocals_size); + + // Gargs points to first local + BytesPerWord + // Set the saved SP after the register window save + // + assert_different_registers(Gargs, Glocals_size, Gframe_size, O5_savedSP); + __ sll(Glocals_size, Interpreter::logStackElementSize, Otmp1); + __ add(Gargs, Otmp1, Gargs); + + if (native_call) { + __ calc_mem_param_words( Glocals_size, Gframe_size ); + __ add( Gframe_size, extra_space, Gframe_size); + __ round_to( Gframe_size, WordsPerLong ); + __ sll( Gframe_size, LogBytesPerWord, Gframe_size ); + + // Native calls don't need the stack size check since they have no + // expression stack and the arguments are already on the stack and + // we only add a handful of words to the stack. + } else { + + // + // Compute number of locals in method apart from incoming parameters + // + const Address size_of_locals(Otmp1, ConstMethod::size_of_locals_offset()); + __ ld_ptr(constMethod, Otmp1); + __ lduh(size_of_locals, Otmp1); + __ sub(Otmp1, Glocals_size, Glocals_size); + __ round_to(Glocals_size, WordsPerLong); + __ sll(Glocals_size, Interpreter::logStackElementSize, Glocals_size); + + // See if the frame is greater than one page in size. If so, + // then we need to verify there is enough stack space remaining. + // Frame_size = (max_stack + extra_space) * BytesPerWord; + __ ld_ptr(constMethod, Gframe_size); + __ lduh(Gframe_size, in_bytes(ConstMethod::max_stack_offset()), Gframe_size); + __ add(Gframe_size, extra_space, Gframe_size); + __ round_to(Gframe_size, WordsPerLong); + __ sll(Gframe_size, Interpreter::logStackElementSize, Gframe_size); + + // Add in java locals size for stack overflow check only + __ add(Gframe_size, Glocals_size, Gframe_size); + + const Register Otmp2 = O4; + assert_different_registers(Otmp1, Otmp2, O5_savedSP); + generate_stack_overflow_check(Gframe_size, Otmp1); + + __ sub(Gframe_size, Glocals_size, Gframe_size); + + // + // bump SP to accommodate the extra locals + // + __ sub(SP, Glocals_size, SP); + } + + // + // now set up a stack frame with the size computed above + // + __ neg( Gframe_size ); + __ save( SP, Gframe_size, SP ); + + // + // now set up all the local cache registers + // + // NOTE: At this point, Lbyte_code/Lscratch has been modified. Note + // that all present references to Lbyte_code initialize the register + // immediately before use + if (native_call) { + __ mov(G0, Lbcp); + } else { + __ ld_ptr(G5_method, Method::const_offset(), Lbcp); + __ add(Lbcp, in_bytes(ConstMethod::codes_offset()), Lbcp); + } + __ mov( G5_method, Lmethod); // set Lmethod + // Get mirror and store it in the frame as GC root for this Method* + Register mirror = LcpoolCache; + __ load_mirror(mirror, Lmethod, Lscratch); + __ st_ptr(mirror, FP, (frame::interpreter_frame_mirror_offset * wordSize) + STACK_BIAS); + __ get_constant_pool_cache(LcpoolCache); // set LcpoolCache + __ sub(FP, rounded_vm_local_words * BytesPerWord, Lmonitors ); // set Lmonitors + __ add(Lmonitors, STACK_BIAS, Lmonitors); // Account for 64 bit stack bias + __ sub(Lmonitors, BytesPerWord, Lesp); // set Lesp + + // setup interpreter activation registers + __ sub(Gargs, BytesPerWord, Llocals); // set Llocals + + if (ProfileInterpreter) { + __ set_method_data_pointer(); + } + +} + +// Method entry for java.lang.ref.Reference.get. +address TemplateInterpreterGenerator::generate_Reference_get_entry(void) { + // Code: _aload_0, _getfield, _areturn + // parameter size = 1 + // + // The code that gets generated by this routine is split into 2 parts: + // 1. The "intrinsified" code performing an ON_WEAK_OOP_REF load, + // 2. The slow path - which is an expansion of the regular method entry. + // + // Notes:- + // * An intrinsic is always executed, where an ON_WEAK_OOP_REF load is performed. + // * We may jump to the slow path iff the receiver is null. If the + // Reference object is null then we no longer perform an ON_WEAK_OOP_REF load + // Thus we can use the regular method entry code to generate the NPE. + // + // This code is based on generate_accessor_enty. + + address entry = __ pc(); + + const int referent_offset = java_lang_ref_Reference::referent_offset(); + + Label slow_path; + + // In the G1 code we don't check if we need to reach a safepoint. We + // continue and the thread will safepoint at the next bytecode dispatch. + + // Check if local 0 != NULL + // If the receiver is null then it is OK to jump to the slow path. + __ ld_ptr(Gargs, G0, Otos_i ); // get local 0 + // check if local 0 == NULL and go the slow path + __ cmp_and_brx_short(Otos_i, 0, Assembler::equal, Assembler::pn, slow_path); + + __ load_heap_oop(Otos_i, referent_offset, Otos_i, G3_scratch, ON_WEAK_OOP_REF); + + // _areturn + __ retl(); // return from leaf routine + __ delayed()->mov(O5_savedSP, SP); + + // Generate regular method entry + __ bind(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); + return entry; +} + +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.update(int crc, int b) + */ +address TemplateInterpreterGenerator::generate_CRC32_update_entry() { + + if (UseCRC32Intrinsics) { + address entry = __ pc(); + + Label L_slow_path; + // If we need a safepoint check, generate full interpreter entry. + __ safepoint_poll(L_slow_path, false, G2_thread, O2); + __ delayed()->nop(); + + // Load parameters + const Register crc = O0; // initial crc + const Register val = O1; // byte to update with + const Register table = O2; // address of 256-entry lookup table + + __ ldub(Gargs, 3, val); + __ lduw(Gargs, 8, crc); + + __ set(ExternalAddress(StubRoutines::crc_table_addr()), table); + + __ not1(crc); // ~crc + __ clruwu(crc); + __ update_byte_crc32(crc, val, table); + __ not1(crc); // ~crc + + // result in O0 + __ retl(); + __ delayed()->nop(); + + // generate a vanilla native entry as the slow path + __ bind(L_slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); + return entry; + } + return NULL; +} + +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) + * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) + */ +address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + + if (UseCRC32Intrinsics) { + address entry = __ pc(); + + Label L_slow_path; + // If we need a safepoint check, generate full interpreter entry. + + __ safepoint_poll(L_slow_path, false, G2_thread, O2); + __ delayed()->nop(); + + // Load parameters from the stack + const Register crc = O0; // initial crc + const Register buf = O1; // source java byte array address + const Register len = O2; // len + const Register offset = O3; // offset + + // Arguments are reversed on java expression stack + // Calculate address of start element + if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { + __ lduw(Gargs, 0, len); + __ lduw(Gargs, 8, offset); + __ ldx( Gargs, 16, buf); + __ lduw(Gargs, 32, crc); + __ add(buf, offset, buf); + } else { + __ lduw(Gargs, 0, len); + __ lduw(Gargs, 8, offset); + __ ldx( Gargs, 16, buf); + __ lduw(Gargs, 24, crc); + __ add(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE), buf); // account for the header size + __ add(buf, offset, buf); + } + + // Call the crc32 kernel + __ MacroAssembler::save_thread(L7_thread_cache); + __ kernel_crc32(crc, buf, len, O3); + __ MacroAssembler::restore_thread(L7_thread_cache); + + // result in O0 + __ retl(); + __ delayed()->nop(); + + // generate a vanilla native entry as the slow path + __ bind(L_slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); + return entry; + } + return NULL; +} + +/** + * Method entry for intrinsic-candidate (non-native) methods: + * int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) + * int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end) + * Unlike CRC32, CRC32C does not have any methods marked as native + * CRC32C also uses an "end" variable instead of the length variable CRC32 uses + */ +address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + + if (UseCRC32CIntrinsics) { + address entry = __ pc(); + + // Load parameters from the stack + const Register crc = O0; // initial crc + const Register buf = O1; // source java byte array address + const Register offset = O2; // offset + const Register end = O3; // index of last element to process + const Register len = O2; // len argument to the kernel + const Register table = O3; // crc32c lookup table address + + // Arguments are reversed on java expression stack + // Calculate address of start element + if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) { + __ lduw(Gargs, 0, end); + __ lduw(Gargs, 8, offset); + __ ldx( Gargs, 16, buf); + __ lduw(Gargs, 32, crc); + __ add(buf, offset, buf); + __ sub(end, offset, len); + } else { + __ lduw(Gargs, 0, end); + __ lduw(Gargs, 8, offset); + __ ldx( Gargs, 16, buf); + __ lduw(Gargs, 24, crc); + __ add(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE), buf); // account for the header size + __ add(buf, offset, buf); + __ sub(end, offset, len); + } + + // Call the crc32c kernel + __ MacroAssembler::save_thread(L7_thread_cache); + __ kernel_crc32c(crc, buf, len, table); + __ MacroAssembler::restore_thread(L7_thread_cache); + + // result in O0 + __ retl(); + __ delayed()->nop(); + + return entry; + } + return NULL; +} + +/* Math routines only partially supported. + * + * Providing support for fma (float/double) only. + */ +address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) +{ + if (!InlineIntrinsics) return NULL; // Generate a vanilla entry + + address entry = __ pc(); + + switch (kind) { + case Interpreter::java_lang_math_fmaF: + if (UseFMA) { + // float .fma(float a, float b, float c) + const FloatRegister ra = F1; + const FloatRegister rb = F2; + const FloatRegister rc = F3; + const FloatRegister rd = F0; // Result. + + __ ldf(FloatRegisterImpl::S, Gargs, 0, rc); + __ ldf(FloatRegisterImpl::S, Gargs, 8, rb); + __ ldf(FloatRegisterImpl::S, Gargs, 16, ra); + + __ fmadd(FloatRegisterImpl::S, ra, rb, rc, rd); + __ retl(); // Result in F0 (rd). + __ delayed()->mov(O5_savedSP, SP); + + return entry; + } + break; + case Interpreter::java_lang_math_fmaD: + if (UseFMA) { + // double .fma(double a, double b, double c) + const FloatRegister ra = F2; // D1 + const FloatRegister rb = F4; // D2 + const FloatRegister rc = F6; // D3 + const FloatRegister rd = F0; // D0 Result. + + __ ldf(FloatRegisterImpl::D, Gargs, 0, rc); + __ ldf(FloatRegisterImpl::D, Gargs, 16, rb); + __ ldf(FloatRegisterImpl::D, Gargs, 32, ra); + + __ fmadd(FloatRegisterImpl::D, ra, rb, rc, rd); + __ retl(); // Result in D0 (rd). + __ delayed()->mov(O5_savedSP, SP); + + return entry; + } + break; + default: + break; + } + return NULL; +} + +// TODO: rather than touching all pages, check against stack_overflow_limit and bang yellow page to +// generate exception +void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { + // Quick & dirty stack overflow checking: bang the stack & handle trap. + // Note that we do the banging after the frame is setup, since the exception + // handling code expects to find a valid interpreter frame on the stack. + // Doing the banging earlier fails if the caller frame is not an interpreter + // frame. + // (Also, the exception throwing code expects to unlock any synchronized + // method receiver, so do the banging after locking the receiver.) + + // Bang each page in the shadow zone. We can't assume it's been done for + // an interpreter frame with greater than a page of locals, so each page + // needs to be checked. Only true for non-native. + const int page_size = os::vm_page_size(); + const int n_shadow_pages = ((int)StackOverflow::stack_shadow_zone_size()) / page_size; + const int start_page = native_call ? n_shadow_pages : 1; + for (int pages = start_page; pages <= n_shadow_pages; pages++) { + __ bang_stack_with_offset(pages*page_size); + } +} + +// +// Interpreter stub for calling a native method. (asm interpreter) +// This sets up a somewhat different looking stack for calling the native method +// than the typical interpreter frame setup. +// + +address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { + address entry = __ pc(); + + // the following temporary registers are used during frame creation + const Register Gtmp1 = G3_scratch ; + const Register Gtmp2 = G1_scratch; + bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; + + // make sure registers are different! + assert_different_registers(G2_thread, G5_method, Gargs, Gtmp1, Gtmp2); + + const Address Laccess_flags(Lmethod, Method::access_flags_offset()); + + const Register Glocals_size = G3; + assert_different_registers(Glocals_size, G4_scratch, Gframe_size); + + // make sure method is native & not abstract + // rethink these assertions - they can be simplified and shared (gri 2/25/2000) +#ifdef ASSERT + __ ld(G5_method, Method::access_flags_offset(), Gtmp1); + { Label L; + __ btst(JVM_ACC_NATIVE, Gtmp1); + __ br(Assembler::notZero, false, Assembler::pt, L); + __ delayed()->nop(); + __ stop("tried to execute non-native method as native"); + __ bind(L); + } + { Label L; + __ btst(JVM_ACC_ABSTRACT, Gtmp1); + __ br(Assembler::zero, false, Assembler::pt, L); + __ delayed()->nop(); + __ stop("tried to execute abstract method as non-abstract"); + __ bind(L); + } +#endif // ASSERT + + // generate the code to allocate the interpreter stack frame + generate_fixed_frame(true); + + // + // No locals to initialize for native method + // + + // this slot will be set later, we initialize it to null here just in + // case we get a GC before the actual value is stored later + __ st_ptr(G0, FP, (frame::interpreter_frame_oop_temp_offset * wordSize) + STACK_BIAS); + + const Address do_not_unlock_if_synchronized(G2_thread, + JavaThread::do_not_unlock_if_synchronized_offset()); + // Since at this point in the method invocation the exception handler + // would try to exit the monitor of synchronized methods which hasn't + // been entered yet, we set the thread local variable + // _do_not_unlock_if_synchronized to true. If any exception was thrown by + // runtime, exception handling i.e. unlock_if_synchronized_method will + // check this thread local flag. + // This flag has two effects, one is to force an unwind in the topmost + // interpreter frame and not perform an unlock while doing so. + + __ movbool(true, G3_scratch); + __ stbool(G3_scratch, do_not_unlock_if_synchronized); + + // increment invocation counter and check for overflow + // + // Note: checking for negative value instead of overflow + // so we have a 'sticky' overflow test (may be of + // importance as soon as we have true MT/MP) + Label invocation_counter_overflow; + Label Lcontinue; + if (inc_counter) { + generate_counter_incr(&invocation_counter_overflow); + + } + __ bind(Lcontinue); + + bang_stack_shadow_pages(true); + + // reset the _do_not_unlock_if_synchronized flag + __ stbool(G0, do_not_unlock_if_synchronized); + + // check for synchronized methods + // Must happen AFTER invocation_counter check and stack overflow check, + // so method is not locked if overflows. + + if (synchronized) { + lock_method(); + } else { +#ifdef ASSERT + { Label ok; + __ ld(Laccess_flags, O0); + __ btst(JVM_ACC_SYNCHRONIZED, O0); + __ br( Assembler::zero, false, Assembler::pt, ok); + __ delayed()->nop(); + __ stop("method needs synchronization"); + __ bind(ok); + } +#endif // ASSERT + } + + + // start execution + __ verify_thread(); + + // JVMTI support + __ notify_method_entry(); + + // native call + + // (note that O0 is never an oop--at most it is a handle) + // It is important not to smash any handles created by this call, + // until any oop handle in O0 is dereferenced. + + // (note that the space for outgoing params is preallocated) + + // get signature handler + { Label L; + Address signature_handler(Lmethod, Method::signature_handler_offset()); + __ ld_ptr(signature_handler, G3_scratch); + __ br_notnull_short(G3_scratch, Assembler::pt, L); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), Lmethod); + __ ld_ptr(signature_handler, G3_scratch); + __ bind(L); + } + + // Push a new frame so that the args will really be stored in + // Copy a few locals across so the new frame has the variables + // we need but these values will be dead at the jni call and + // therefore not gc volatile like the values in the current + // frame (Lmethod in particular) + + // Flush the method pointer to the register save area + __ st_ptr(Lmethod, SP, (Lmethod->sp_offset_in_saved_window() * wordSize) + STACK_BIAS); + __ mov(Llocals, O1); + + // calculate where the mirror handle body is allocated in the interpreter frame: + __ add(FP, (frame::interpreter_frame_oop_temp_offset * wordSize) + STACK_BIAS, O2); + + // Calculate current frame size + __ sub(SP, FP, O3); // Calculate negative of current frame size + __ save(SP, O3, SP); // Allocate an identical sized frame + + // Note I7 has leftover trash. Slow signature handler will fill it in + // should we get there. Normal jni call will set reasonable last_Java_pc + // below (and fix I7 so the stack trace doesn't have a meaningless frame + // in it). + + // Load interpreter frame's Lmethod into same register here + + __ ld_ptr(FP, (Lmethod->sp_offset_in_saved_window() * wordSize) + STACK_BIAS, Lmethod); + + __ mov(I1, Llocals); + __ mov(I2, Lscratch2); // save the address of the mirror + + + // ONLY Lmethod and Llocals are valid here! + + // call signature handler, It will move the arg properly since Llocals in current frame + // matches that in outer frame + + __ callr(G3_scratch, 0); + __ delayed()->nop(); + + // Result handler is in Lscratch + + // Reload interpreter frame's Lmethod since slow signature handler may block + __ ld_ptr(FP, (Lmethod->sp_offset_in_saved_window() * wordSize) + STACK_BIAS, Lmethod); + + { Label not_static; + + __ ld(Laccess_flags, O0); + __ btst(JVM_ACC_STATIC, O0); + __ br( Assembler::zero, false, Assembler::pt, not_static); + // get native function entry point(O0 is a good temp until the very end) + __ delayed()->ld_ptr(Lmethod, in_bytes(Method::native_function_offset()), O0); + // for static methods insert the mirror argument + __ load_mirror(O1, Lmethod, G3_scratch); +#ifdef ASSERT + if (!PrintSignatureHandlers) // do not dirty the output with this + { Label L; + __ br_notnull_short(O1, Assembler::pt, L); + __ stop("mirror is missing"); + __ bind(L); + } +#endif // ASSERT + __ st_ptr(O1, Lscratch2, 0); + __ mov(Lscratch2, O1); + __ bind(not_static); + } + + // At this point, arguments have been copied off of stack into + // their JNI positions, which are O1..O5 and SP[68..]. + // Oops are boxed in-place on the stack, with handles copied to arguments. + // The result handler is in Lscratch. O0 will shortly hold the JNIEnv*. + +#ifdef ASSERT + { Label L; + __ br_notnull_short(O0, Assembler::pt, L); + __ stop("native entry point is missing"); + __ bind(L); + } +#endif // ASSERT + + // + // setup the frame anchor + // + // The scavenge function only needs to know that the PC of this frame is + // in the interpreter method entry code, it doesn't need to know the exact + // PC and hence we can use O7 which points to the return address from the + // previous call in the code stream (signature handler function) + // + // The other trick is we set last_Java_sp to FP instead of the usual SP because + // we have pushed the extra frame in order to protect the volatile register(s) + // in that frame when we return from the jni call + // + + __ set_last_Java_frame(FP, O7); + __ mov(O7, I7); // make dummy interpreter frame look like one above, + // not meaningless information that'll confuse me. + + // flush the windows now. We don't care about the current (protection) frame + // only the outer frames + + __ flushw(); + + // mark windows as flushed + Address flags(G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset()); + __ set(JavaFrameAnchor::flushed, G3_scratch); + __ st(G3_scratch, flags); + + // Transition from _thread_in_Java to _thread_in_native. We are already safepoint ready. + + Address thread_state(G2_thread, JavaThread::thread_state_offset()); +#ifdef ASSERT + { Label L; + __ ld(thread_state, G3_scratch); + __ cmp_and_br_short(G3_scratch, _thread_in_Java, Assembler::equal, Assembler::pt, L); + __ stop("Wrong thread state in native stub"); + __ bind(L); + } +#endif // ASSERT + __ set(_thread_in_native, G3_scratch); + __ st(G3_scratch, thread_state); + + // Call the jni method, using the delay slot to set the JNIEnv* argument. + __ save_thread(L7_thread_cache); // save Gthread + __ callr(O0, 0); + __ delayed()-> + add(L7_thread_cache, in_bytes(JavaThread::jni_environment_offset()), O0); + + // Back from jni method Lmethod in this frame is DEAD, DEAD, DEAD + + __ restore_thread(L7_thread_cache); // restore G2_thread + __ reinit_heapbase(); + + // must we block? + + // Block, if necessary, before resuming in _thread_in_Java state. + // In order for GC to work, don't clear the last_Java_sp until after blocking. + { Label no_block; + + // Switch thread to "native transition" state before reading the synchronization state. + // This additional state is necessary because reading and testing the synchronization + // state is not atomic w.r.t. GC, as this scenario demonstrates: + // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. + // VM thread changes sync state to synchronizing and suspends threads for GC. + // Thread A is resumed to finish this native method, but doesn't block here since it + // didn't see any synchronization is progress, and escapes. + __ set(_thread_in_native_trans, G3_scratch); + __ st(G3_scratch, thread_state); + + // Force this write out before the read below + __ membar(Assembler::StoreLoad); + + Label L; + __ safepoint_poll(L, false, G2_thread, G3_scratch); + __ delayed()->ld(G2_thread, JavaThread::suspend_flags_offset(), G3_scratch); + __ cmp_and_br_short(G3_scratch, 0, Assembler::equal, Assembler::pt, no_block); + __ bind(L); + + // Block. Save any potential method result value before the operation and + // use a leaf call to leave the last_Java_frame setup undisturbed. + save_native_result(); + __ call_VM_leaf(L7_thread_cache, + CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), + G2_thread); + + // Restore any method result value + restore_native_result(); + __ bind(no_block); + } + + // Clear the frame anchor now + + __ reset_last_Java_frame(); + + // Move the result handler address + __ mov(Lscratch, G3_scratch); + // return possible result to the outer frame + __ restore(O0, G0, O0); + + // Move result handler to expected register + __ mov(G3_scratch, Lscratch); + + // Back in normal (native) interpreter frame. State is thread_in_native_trans + // switch to thread_in_Java. + + __ set(_thread_in_Java, G3_scratch); + __ st(G3_scratch, thread_state); + + if (CheckJNICalls) { + // clear_pending_jni_exception_check + __ st_ptr(G0, G2_thread, JavaThread::pending_jni_exception_check_fn_offset()); + } + + // reset handle block + __ ld_ptr(G2_thread, JavaThread::active_handles_offset(), G3_scratch); + __ st(G0, G3_scratch, JNIHandleBlock::top_offset_in_bytes()); + + // If we have an oop result store it where it will be safe for any further gc + // until we return now that we've released the handle it might be protected by + + { Label no_oop; + + __ set((intptr_t)AbstractInterpreter::result_handler(T_OBJECT), G3_scratch); + __ cmp_and_brx_short(G3_scratch, Lscratch, Assembler::notEqual, Assembler::pt, no_oop); + __ resolve_jobject(O0, G3_scratch); + // Store it where gc will look for it and result handler expects it. + __ st_ptr(O0, FP, (frame::interpreter_frame_oop_temp_offset*wordSize) + STACK_BIAS); + + __ bind(no_oop); + } + + + // handle exceptions (exception handling will handle unlocking!) + { Label L; + Address exception_addr(G2_thread, Thread::pending_exception_offset()); + __ ld_ptr(exception_addr, Gtemp); + __ br_null_short(Gtemp, Assembler::pt, L); + // Note: This could be handled more efficiently since we know that the native + // method doesn't have an exception handler. We could directly return + // to the exception handler for the caller. + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception)); + __ should_not_reach_here(); + __ bind(L); + } + + // JVMTI support (preserves thread register) + __ notify_method_exit(true, ilgl, InterpreterMacroAssembler::NotifyJVMTI); + + if (synchronized) { + // save and restore any potential method result value around the unlocking operation + save_native_result(); + + __ add( __ top_most_monitor(), O1); + __ unlock_object(O1); + + restore_native_result(); + } + + // dispose of return address and remove activation +#ifdef ASSERT + { Label ok; + __ cmp_and_brx_short(I5_savedSP, FP, Assembler::greaterEqualUnsigned, Assembler::pt, ok); + __ stop("bad I5_savedSP value"); + __ should_not_reach_here(); + __ bind(ok); + } +#endif + __ jmp(Lscratch, 0); + __ delayed()->nop(); + + if (inc_counter) { + // handle invocation counter overflow + __ bind(invocation_counter_overflow); + generate_counter_overflow(Lcontinue); + } + + return entry; +} + + +// Generic method entry to (asm) interpreter +address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { + address entry = __ pc(); + + bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; + + // the following temporary registers are used during frame creation + const Register Gtmp1 = G3_scratch ; + const Register Gtmp2 = G1_scratch; + + // make sure registers are different! + assert_different_registers(G2_thread, G5_method, Gargs, Gtmp1, Gtmp2); + + const Address constMethod (G5_method, Method::const_offset()); + // Seems like G5_method is live at the point this is used. So we could make this look consistent + // and use in the asserts. + const Address access_flags (Lmethod, Method::access_flags_offset()); + + const Register Glocals_size = G3; + assert_different_registers(Glocals_size, G4_scratch, Gframe_size); + + // make sure method is not native & not abstract + // rethink these assertions - they can be simplified and shared (gri 2/25/2000) +#ifdef ASSERT + __ ld(G5_method, Method::access_flags_offset(), Gtmp1); + { Label L; + __ btst(JVM_ACC_NATIVE, Gtmp1); + __ br(Assembler::zero, false, Assembler::pt, L); + __ delayed()->nop(); + __ stop("tried to execute native method as non-native"); + __ bind(L); + } + { Label L; + __ btst(JVM_ACC_ABSTRACT, Gtmp1); + __ br(Assembler::zero, false, Assembler::pt, L); + __ delayed()->nop(); + __ stop("tried to execute abstract method as non-abstract"); + __ bind(L); + } +#endif // ASSERT + + // generate the code to allocate the interpreter stack frame + + generate_fixed_frame(false); + + // + // Code to initialize the extra (i.e. non-parm) locals + // + Register init_value = noreg; // will be G0 if we must clear locals + // The way the code was setup before zerolocals was always true for vanilla java entries. + // It could only be false for the specialized entries like accessor or empty which have + // no extra locals so the testing was a waste of time and the extra locals were always + // initialized. We removed this extra complication to already over complicated code. + + init_value = G0; + Label clear_loop; + + const Register RconstMethod = O1; + const Address size_of_parameters(RconstMethod, ConstMethod::size_of_parameters_offset()); + const Address size_of_locals (RconstMethod, ConstMethod::size_of_locals_offset()); + + // NOTE: If you change the frame layout, this code will need to + // be updated! + __ ld_ptr( constMethod, RconstMethod ); + __ lduh( size_of_locals, O2 ); + __ lduh( size_of_parameters, O1 ); + __ sll( O2, Interpreter::logStackElementSize, O2); + __ sll( O1, Interpreter::logStackElementSize, O1 ); + __ sub( Llocals, O2, O2 ); + __ sub( Llocals, O1, O1 ); + + __ bind( clear_loop ); + __ inc( O2, wordSize ); + + __ cmp( O2, O1 ); + __ brx( Assembler::lessEqualUnsigned, true, Assembler::pt, clear_loop ); + __ delayed()->st_ptr( init_value, O2, 0 ); + + const Address do_not_unlock_if_synchronized(G2_thread, + JavaThread::do_not_unlock_if_synchronized_offset()); + // Since at this point in the method invocation the exception handler + // would try to exit the monitor of synchronized methods which hasn't + // been entered yet, we set the thread local variable + // _do_not_unlock_if_synchronized to true. If any exception was thrown by + // runtime, exception handling i.e. unlock_if_synchronized_method will + // check this thread local flag. + __ movbool(true, G3_scratch); + __ stbool(G3_scratch, do_not_unlock_if_synchronized); + + __ profile_parameters_type(G1_scratch, G3_scratch, G4_scratch, Lscratch); + // increment invocation counter and check for overflow + // + // Note: checking for negative value instead of overflow + // so we have a 'sticky' overflow test (may be of + // importance as soon as we have true MT/MP) + Label invocation_counter_overflow; + Label Lcontinue; + if (inc_counter) { + generate_counter_incr(&invocation_counter_overflow); + } + __ bind(Lcontinue); + + bang_stack_shadow_pages(false); + + // reset the _do_not_unlock_if_synchronized flag + __ stbool(G0, do_not_unlock_if_synchronized); + + // check for synchronized methods + // Must happen AFTER invocation_counter check and stack overflow check, + // so method is not locked if overflows. + + if (synchronized) { + lock_method(); + } else { +#ifdef ASSERT + { Label ok; + __ ld(access_flags, O0); + __ btst(JVM_ACC_SYNCHRONIZED, O0); + __ br( Assembler::zero, false, Assembler::pt, ok); + __ delayed()->nop(); + __ stop("method needs synchronization"); + __ bind(ok); + } +#endif // ASSERT + } + + // start execution + + __ verify_thread(); + + // jvmti support + __ notify_method_entry(); + + // start executing instructions + __ dispatch_next(vtos); + + + if (inc_counter) { + // handle invocation counter overflow + __ bind(invocation_counter_overflow); + generate_counter_overflow(Lcontinue); + } + + return entry; +} + +//---------------------------------------------------------------------------------------------------- +// Exceptions +void TemplateInterpreterGenerator::generate_throw_exception() { + + // Entry point in previous activation (i.e., if the caller was interpreted) + Interpreter::_rethrow_exception_entry = __ pc(); + // O0: exception + + // entry point for exceptions thrown within interpreter code + Interpreter::_throw_exception_entry = __ pc(); + __ verify_thread(); + // expression stack is undefined here + // O0: exception, i.e. Oexception + // Lbcp: exception bcp + __ verify_oop(Oexception); + + + // expression stack must be empty before entering the VM in case of an exception + __ empty_expression_stack(); + // find exception handler address and preserve exception oop + // call C routine to find handler and jump to it + __ call_VM(O1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), Oexception); + __ push_ptr(O1); // push exception for exception handler bytecodes + + __ JMP(O0, 0); // jump to exception handler (may be remove activation entry!) + __ delayed()->nop(); + + + // if the exception is not handled in the current frame + // the frame is removed and the exception is rethrown + // (i.e. exception continuation is _rethrow_exception) + // + // Note: At this point the bci is still the bxi for the instruction which caused + // the exception and the expression stack is empty. Thus, for any VM calls + // at this point, GC will find a legal oop map (with empty expression stack). + + // in current activation + // tos: exception + // Lbcp: exception bcp + + // + // JVMTI PopFrame support + // + + Interpreter::_remove_activation_preserving_args_entry = __ pc(); + Address popframe_condition_addr(G2_thread, JavaThread::popframe_condition_offset()); + // Set the popframe_processing bit in popframe_condition indicating that we are + // currently handling popframe, so that call_VMs that may happen later do not trigger new + // popframe handling cycles. + + __ ld(popframe_condition_addr, G3_scratch); + __ or3(G3_scratch, JavaThread::popframe_processing_bit, G3_scratch); + __ stw(G3_scratch, popframe_condition_addr); + + // Empty the expression stack, as in normal exception handling + __ empty_expression_stack(); + __ unlock_if_synchronized_method(vtos, /* throw_monitor_exception */ false, /* install_monitor_exception */ false); + + { + // Check to see whether we are returning to a deoptimized frame. + // (The PopFrame call ensures that the caller of the popped frame is + // either interpreted or compiled and deoptimizes it if compiled.) + // In this case, we can't call dispatch_next() after the frame is + // popped, but instead must save the incoming arguments and restore + // them after deoptimization has occurred. + // + // Note that we don't compare the return PC against the + // deoptimization blob's unpack entry because of the presence of + // adapter frames in C2. + Label caller_not_deoptimized; + __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), I7); + __ br_notnull_short(O0, Assembler::pt, caller_not_deoptimized); + + const Register Gtmp1 = G3_scratch; + const Register Gtmp2 = G1_scratch; + const Register RconstMethod = Gtmp1; + const Address constMethod(Lmethod, Method::const_offset()); + const Address size_of_parameters(RconstMethod, ConstMethod::size_of_parameters_offset()); + + // Compute size of arguments for saving when returning to deoptimized caller + __ ld_ptr(constMethod, RconstMethod); + __ lduh(size_of_parameters, Gtmp1); + __ sll(Gtmp1, Interpreter::logStackElementSize, Gtmp1); + __ sub(Llocals, Gtmp1, Gtmp2); + __ add(Gtmp2, wordSize, Gtmp2); + // Save these arguments + __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), G2_thread, Gtmp1, Gtmp2); + // Inform deoptimization that it is responsible for restoring these arguments + __ set(JavaThread::popframe_force_deopt_reexecution_bit, Gtmp1); + Address popframe_condition_addr(G2_thread, JavaThread::popframe_condition_offset()); + __ st(Gtmp1, popframe_condition_addr); + + // Return from the current method + // The caller's SP was adjusted upon method entry to accommodate + // the callee's non-argument locals. Undo that adjustment. + __ ret(); + __ delayed()->restore(I5_savedSP, G0, SP); + + __ bind(caller_not_deoptimized); + } + + // Clear the popframe condition flag + __ stw(G0 /* popframe_inactive */, popframe_condition_addr); + + // Get out of the current method (how this is done depends on the particular compiler calling + // convention that the interpreter currently follows) + // The caller's SP was adjusted upon method entry to accommodate + // the callee's non-argument locals. Undo that adjustment. + __ restore(I5_savedSP, G0, SP); + // The method data pointer was incremented already during + // call profiling. We have to restore the mdp for the current bcp. + if (ProfileInterpreter) { + __ set_method_data_pointer_for_bcp(); + } + +#if INCLUDE_JVMTI + { Label L_done; + + __ ldub(Address(Lbcp, 0), G1_scratch); // Load current bytecode + __ cmp_and_br_short(G1_scratch, Bytecodes::_invokestatic, Assembler::notEqual, Assembler::pn, L_done); + + // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. + // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. + + __ call_VM(G1_scratch, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), I0, Lmethod, Lbcp); + + __ br_null(G1_scratch, false, Assembler::pn, L_done); + __ delayed()->nop(); + + __ st_ptr(G1_scratch, Lesp, wordSize); + __ bind(L_done); + } +#endif // INCLUDE_JVMTI + + // Resume bytecode interpretation at the current bcp + __ dispatch_next(vtos); + // end of JVMTI PopFrame support + + Interpreter::_remove_activation_entry = __ pc(); + + // preserve exception over this code sequence (remove activation calls the vm, but oopmaps are not correct here) + __ pop_ptr(Oexception); // get exception + + // Intel has the following comment: + //// remove the activation (without doing throws on illegalMonitorExceptions) + // They remove the activation without checking for bad monitor state. + // %%% We should make sure this is the right semantics before implementing. + + __ set_vm_result(Oexception); + __ unlock_if_synchronized_method(vtos, /* throw_monitor_exception */ false); + + __ notify_method_exit(false, vtos, InterpreterMacroAssembler::SkipNotifyJVMTI); + + __ get_vm_result(Oexception); + __ verify_oop(Oexception); + + const int return_reg_adjustment = frame::pc_return_offset; + Address issuing_pc_addr(I7, return_reg_adjustment); + + // We are done with this activation frame; find out where to go next. + // The continuation point will be an exception handler, which expects + // the following registers set up: + // + // Oexception: exception + // Oissuing_pc: the local call that threw exception + // Other On: garbage + // In/Ln: the contents of the caller's register window + // + // We do the required restore at the last possible moment, because we + // need to preserve some state across a runtime call. + // (Remember that the caller activation is unknown--it might not be + // interpreted, so things like Lscratch are useless in the caller.) + + // Although the Intel version uses call_C, we can use the more + // compact call_VM. (The only real difference on SPARC is a + // harmlessly ignored [re]set_last_Java_frame, compared with + // the Intel code which lacks this.) + __ mov(Oexception, Oexception ->after_save()); // get exception in I0 so it will be on O0 after restore + __ add(issuing_pc_addr, Oissuing_pc->after_save()); // likewise set I1 to a value local to the caller + __ super_call_VM_leaf(L7_thread_cache, + CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), + G2_thread, Oissuing_pc->after_save()); + + // The caller's SP was adjusted upon method entry to accommodate + // the callee's non-argument locals. Undo that adjustment. + __ JMP(O0, 0); // return exception handler in caller + __ delayed()->restore(I5_savedSP, G0, SP); + + // (same old exception object is already in Oexception; see above) + // Note that an "issuing PC" is actually the next PC after the call +} + + +// +// JVMTI ForceEarlyReturn support +// + +address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { + address entry = __ pc(); + + __ empty_expression_stack(); + __ load_earlyret_value(state); + + __ ld_ptr(G2_thread, JavaThread::jvmti_thread_state_offset(), G3_scratch); + Address cond_addr(G3_scratch, JvmtiThreadState::earlyret_state_offset()); + + // Clear the earlyret state + __ stw(G0 /* JvmtiThreadState::earlyret_inactive */, cond_addr); + + __ remove_activation(state, + /* throw_monitor_exception */ false, + /* install_monitor_exception */ false); + + // The caller's SP was adjusted upon method entry to accommodate + // the callee's non-argument locals. Undo that adjustment. + __ ret(); // return to caller + __ delayed()->restore(I5_savedSP, G0, SP); + + return entry; +} // end of JVMTI ForceEarlyReturn support + + +//------------------------------------------------------------------------------------------------------------------------ +// Helper for vtos entry point generation + +void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, address& bep, address& cep, address& sep, address& aep, address& iep, address& lep, address& fep, address& dep, address& vep) { + assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); + Label L; + aep = __ pc(); __ push_ptr(); __ ba_short(L); + fep = __ pc(); __ push_f(); __ ba_short(L); + dep = __ pc(); __ push_d(); __ ba_short(L); + lep = __ pc(); __ push_l(); __ ba_short(L); + iep = __ pc(); __ push_i(); + bep = cep = sep = iep; // there aren't any + vep = __ pc(); __ bind(L); // fall through + generate_and_dispatch(t); +} + +// -------------------------------------------------------------------------------- + +// Non-product code +#ifndef PRODUCT +address TemplateInterpreterGenerator::generate_trace_code(TosState state) { + address entry = __ pc(); + + __ push(state); + __ mov(O7, Lscratch); // protect return address within interpreter + + // Pass a 0 (not used in sparc) and the top of stack to the bytecode tracer + __ mov( Otos_l2, G3_scratch ); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), G0, Otos_l1, G3_scratch); + __ mov(Lscratch, O7); // restore return address + __ pop(state); + __ retl(); + __ delayed()->nop(); + + return entry; +} + + +// helpers for generate_and_dispatch + +void TemplateInterpreterGenerator::count_bytecode() { + __ inc_counter(&BytecodeCounter::_counter_value, G3_scratch, G4_scratch); +} + + +void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { + __ inc_counter(&BytecodeHistogram::_counters[t->bytecode()], G3_scratch, G4_scratch); +} + + +void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { + AddressLiteral index (&BytecodePairHistogram::_index); + AddressLiteral counters((address) &BytecodePairHistogram::_counters); + + // get index, shift out old bytecode, bring in new bytecode, and store it + // _index = (_index >> log2_number_of_codes) | + // (bytecode << log2_number_of_codes); + + __ load_contents(index, G4_scratch); + __ srl( G4_scratch, BytecodePairHistogram::log2_number_of_codes, G4_scratch ); + __ set( ((int)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes, G3_scratch ); + __ or3( G3_scratch, G4_scratch, G4_scratch ); + __ store_contents(G4_scratch, index, G3_scratch); + + // bump bucket contents + // _counters[_index] ++; + + __ set(counters, G3_scratch); // loads into G3_scratch + __ sll( G4_scratch, LogBytesPerWord, G4_scratch ); // Index is word address + __ add (G3_scratch, G4_scratch, G3_scratch); // Add in index + __ ld (G3_scratch, 0, G4_scratch); + __ inc (G4_scratch); + __ st (G4_scratch, 0, G3_scratch); +} + + +void TemplateInterpreterGenerator::trace_bytecode(Template* t) { + // Call a little run-time stub to avoid blow-up for each bytecode. + // The run-time runtime saves the right registers, depending on + // the tosca in-state for the given template. + address entry = Interpreter::trace_code(t->tos_in()); + guarantee(entry != NULL, "entry must have been generated"); + __ call(entry, relocInfo::none); + __ delayed()->nop(); +} + + +void TemplateInterpreterGenerator::stop_interpreter_at() { + AddressLiteral counter(&BytecodeCounter::_counter_value); + __ load_contents(counter, G3_scratch); + AddressLiteral stop_at(&StopInterpreterAt); + __ load_ptr_contents(stop_at, G4_scratch); + __ cmp(G3_scratch, G4_scratch); + __ breakpoint_trap(Assembler::equal, Assembler::icc); +} +#endif // not PRODUCT diff -ur --new-file a/src/hotspot/cpu/sparc/templateTable_sparc.cpp b/src/hotspot/cpu/sparc/templateTable_sparc.cpp --- a/src/hotspot/cpu/sparc/templateTable_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/templateTable_sparc.cpp 2023-04-16 11:42:11.078794419 +0000 @@ -0,0 +1,3833 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/tlab_globals.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/templateTable.hpp" +#include "memory/universe.hpp" +#include "oops/methodData.hpp" +#include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "utilities/macros.hpp" + +#define __ _masm-> + +// Misc helpers + +// Do an oop store like *(base + index + offset) = val +// index can be noreg, +static void do_oop_store(InterpreterMacroAssembler* _masm, + Register base, + Register index, + int offset, + Register val, + Register tmp, + DecoratorSet decorators = 0) { + assert(tmp != val && tmp != base && tmp != index, "register collision"); + assert(index == noreg || offset == 0, "only one offset"); + if (index == noreg) { + __ store_heap_oop(val, base, offset, tmp, decorators); + } else { + __ store_heap_oop(val, base, index, tmp, decorators); + } +} + +// Do an oop load like val = *(base + index + offset) +// index can be noreg. +static void do_oop_load(InterpreterMacroAssembler* _masm, + Register base, + Register index, + int offset, + Register dst, + Register tmp, + DecoratorSet decorators = 0) { + assert(tmp != dst && tmp != base && tmp != index, "register collision"); + assert(index == noreg || offset == 0, "only one offset"); + if (index == noreg) { + __ load_heap_oop(base, offset, dst, tmp, decorators); + } else { + __ load_heap_oop(base, index, dst, tmp, decorators); + } +} + + +//---------------------------------------------------------------------------------------------------- +// Condition conversion +Assembler::Condition ccNot(TemplateTable::Condition cc) { + switch (cc) { + case TemplateTable::equal : return Assembler::notEqual; + case TemplateTable::not_equal : return Assembler::equal; + case TemplateTable::less : return Assembler::greaterEqual; + case TemplateTable::less_equal : return Assembler::greater; + case TemplateTable::greater : return Assembler::lessEqual; + case TemplateTable::greater_equal: return Assembler::less; + } + ShouldNotReachHere(); + return Assembler::zero; +} + +//---------------------------------------------------------------------------------------------------- +// Miscellaneous helper routines + + +Address TemplateTable::at_bcp(int offset) { + assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); + return Address(Lbcp, offset); +} + + +void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, + Register temp_reg, bool load_bc_into_bc_reg/*=true*/, + int byte_no) { + // With sharing on, may need to test Method* flag. + if (!RewriteBytecodes) return; + Label L_patch_done; + + switch (bc) { + case Bytecodes::_fast_aputfield: + case Bytecodes::_fast_bputfield: + case Bytecodes::_fast_zputfield: + case Bytecodes::_fast_cputfield: + case Bytecodes::_fast_dputfield: + case Bytecodes::_fast_fputfield: + case Bytecodes::_fast_iputfield: + case Bytecodes::_fast_lputfield: + case Bytecodes::_fast_sputfield: + { + // We skip bytecode quickening for putfield instructions when + // the put_code written to the constant pool cache is zero. + // This is required so that every execution of this instruction + // calls out to InterpreterRuntime::resolve_get_put to do + // additional, required work. + assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); + assert(load_bc_into_bc_reg, "we use bc_reg as temp"); + __ get_cache_and_index_and_bytecode_at_bcp(bc_reg, temp_reg, temp_reg, byte_no, 1); + __ set(bc, bc_reg); + __ cmp_and_br_short(temp_reg, 0, Assembler::equal, Assembler::pn, L_patch_done); // don't patch + } + break; + default: + assert(byte_no == -1, "sanity"); + if (load_bc_into_bc_reg) { + __ set(bc, bc_reg); + } + } + + if (JvmtiExport::can_post_breakpoint()) { + Label L_fast_patch; + __ ldub(at_bcp(0), temp_reg); + __ cmp_and_br_short(temp_reg, Bytecodes::_breakpoint, Assembler::notEqual, Assembler::pt, L_fast_patch); + // perform the quickening, slowly, in the bowels of the breakpoint table + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), Lmethod, Lbcp, bc_reg); + __ ba_short(L_patch_done); + __ bind(L_fast_patch); + } + +#ifdef ASSERT + Bytecodes::Code orig_bytecode = Bytecodes::java_code(bc); + Label L_okay; + __ ldub(at_bcp(0), temp_reg); + __ cmp(temp_reg, orig_bytecode); + __ br(Assembler::equal, false, Assembler::pt, L_okay); + __ delayed()->cmp(temp_reg, bc_reg); + __ br(Assembler::equal, false, Assembler::pt, L_okay); + __ delayed()->nop(); + __ stop("patching the wrong bytecode"); + __ bind(L_okay); +#endif + + // patch bytecode + __ stb(bc_reg, at_bcp(0)); + __ bind(L_patch_done); +} + +//---------------------------------------------------------------------------------------------------- +// Individual instructions + +void TemplateTable::nop() { + transition(vtos, vtos); + // nothing to do +} + +void TemplateTable::shouldnotreachhere() { + transition(vtos, vtos); + __ stop("shouldnotreachhere bytecode"); +} + +void TemplateTable::aconst_null() { + transition(vtos, atos); + __ clr(Otos_i); +} + + +void TemplateTable::iconst(int value) { + transition(vtos, itos); + __ set(value, Otos_i); +} + + +void TemplateTable::lconst(int value) { + transition(vtos, ltos); + assert(value >= 0, "check this code"); + __ set(value, Otos_l); +} + + +void TemplateTable::fconst(int value) { + transition(vtos, ftos); + static float zero = 0.0, one = 1.0, two = 2.0; + float* p; + switch( value ) { + default: ShouldNotReachHere(); + case 0: p = &zero; break; + case 1: p = &one; break; + case 2: p = &two; break; + } + AddressLiteral a(p); + __ sethi(a, G3_scratch); + __ ldf(FloatRegisterImpl::S, G3_scratch, a.low10(), Ftos_f); +} + + +void TemplateTable::dconst(int value) { + transition(vtos, dtos); + static double zero = 0.0, one = 1.0; + double* p; + switch( value ) { + default: ShouldNotReachHere(); + case 0: p = &zero; break; + case 1: p = &one; break; + } + AddressLiteral a(p); + __ sethi(a, G3_scratch); + __ ldf(FloatRegisterImpl::D, G3_scratch, a.low10(), Ftos_d); +} + + +// %%%%% Should factore most snippet templates across platforms + +void TemplateTable::bipush() { + transition(vtos, itos); + __ ldsb( at_bcp(1), Otos_i ); +} + +void TemplateTable::sipush() { + transition(vtos, itos); + __ get_2_byte_integer_at_bcp(1, G3_scratch, Otos_i, InterpreterMacroAssembler::Signed); +} + +void TemplateTable::ldc(bool wide) { + transition(vtos, vtos); + Label call_ldc, notInt, isString, notString, notClass, notFloat, exit; + + if (wide) { + __ get_2_byte_integer_at_bcp(1, G3_scratch, O1, InterpreterMacroAssembler::Unsigned); + } else { + __ ldub(Lbcp, 1, O1); + } + __ get_cpool_and_tags(O0, O2); + + const int base_offset = ConstantPool::header_size() * wordSize; + const int tags_offset = Array::base_offset_in_bytes(); + + // get type from tags + __ add(O2, tags_offset, O2); + __ ldub(O2, O1, O2); + + // unresolved class? If so, must resolve + __ cmp_and_brx_short(O2, JVM_CONSTANT_UnresolvedClass, Assembler::equal, Assembler::pt, call_ldc); + + // unresolved class in error state + __ cmp_and_brx_short(O2, JVM_CONSTANT_UnresolvedClassInError, Assembler::equal, Assembler::pn, call_ldc); + + __ cmp(O2, JVM_CONSTANT_Class); // need to call vm to get java mirror of the class + __ brx(Assembler::notEqual, true, Assembler::pt, notClass); + __ delayed()->add(O0, base_offset, O0); + + __ bind(call_ldc); + __ set(wide, O1); + call_VM(Otos_i, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), O1); + __ push(atos); + __ ba(exit); + __ delayed()->nop(); + + __ bind(notClass); + // __ add(O0, base_offset, O0); + __ sll(O1, LogBytesPerWord, O1); + __ cmp(O2, JVM_CONSTANT_Integer); + __ brx(Assembler::notEqual, true, Assembler::pt, notInt); + __ delayed()->cmp(O2, JVM_CONSTANT_String); + __ ld(O0, O1, Otos_i); + __ push(itos); + __ ba(exit); + __ delayed()->nop(); + + __ bind(notInt); + // __ cmp(O2, JVM_CONSTANT_String); + __ brx(Assembler::notEqual, true, Assembler::pt, notString); + __ delayed()->cmp(O2, JVM_CONSTANT_Float); + __ bind(isString); + __ stop("string should be rewritten to fast_aldc"); + __ ba(exit); + __ delayed()->nop(); + + __ bind(notString); + //__ cmp(O2, JVM_CONSTANT_Float); + __ brx(Assembler::notEqual, true, Assembler::pt, notFloat); + __ delayed()->nop(); + __ ldf(FloatRegisterImpl::S, O0, O1, Ftos_f); + __ push(ftos); + __ ba(exit); + __ delayed()->nop(); + + // assume the tag is for condy; if not, the VM runtime will tell us + __ bind(notFloat); + condy_helper(exit); + + __ bind(exit); +} + +// Fast path for caching oop constants. +// %%% We should use this to handle Class and String constants also. +// %%% It will simplify the ldc/primitive path considerably. +void TemplateTable::fast_aldc(bool wide) { + transition(vtos, atos); + + int index_size = wide ? sizeof(u2) : sizeof(u1); + Label resolved; + + // We are resolved if the resolved reference cache entry contains a + // non-null object (CallSite, etc.) + assert_different_registers(Otos_i, G3_scratch); + __ get_cache_index_at_bcp(Otos_i, G3_scratch, 1, index_size); // load index => G3_scratch + __ load_resolved_reference_at_index(Otos_i, G3_scratch, Lscratch); + __ tst(Otos_i); + __ br(Assembler::notEqual, false, Assembler::pt, resolved); + __ delayed()->set((int)bytecode(), O1); + + address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); + + // first time invocation - must resolve first + __ call_VM(Otos_i, entry, O1); + __ bind(resolved); + + { // Check for the null sentinel. + // If we just called the VM, it already did the mapping for us, + // but it's harmless to retry. + Label notNull; + __ set(ExternalAddress((address)Universe::the_null_sentinel_addr()), G3_scratch); + __ ld_ptr(G3_scratch, 0, G3_scratch); + __ cmp(G3_scratch, Otos_i); + __ br(Assembler::notEqual, true, Assembler::pt, notNull); + __ delayed()->nop(); + __ clr(Otos_i); // NULL object reference + __ bind(notNull); + } + + // Safe to call with 0 result + __ verify_oop(Otos_i); +} + +void TemplateTable::ldc2_w() { + transition(vtos, vtos); + Label notDouble, notLong, exit; + + __ get_2_byte_integer_at_bcp(1, G3_scratch, O1, InterpreterMacroAssembler::Unsigned); + __ get_cpool_and_tags(O0, O2); + + const int base_offset = ConstantPool::header_size() * wordSize; + const int tags_offset = Array::base_offset_in_bytes(); + // get type from tags + __ add(O2, tags_offset, O2); + __ ldub(O2, O1, O2); + + __ sll(O1, LogBytesPerWord, O1); + __ add(O0, O1, G3_scratch); + + __ cmp_and_brx_short(O2, JVM_CONSTANT_Double, Assembler::notEqual, Assembler::pt, notDouble); + // A double can be placed at word-aligned locations in the constant pool. + // Check out Conversions.java for an example. + // Also ConstantPool::header_size() is 20, which makes it very difficult + // to double-align double on the constant pool. SG, 11/7/97 + __ ldf(FloatRegisterImpl::D, G3_scratch, base_offset, Ftos_d); + __ push(dtos); + __ ba_short(exit); + + __ bind(notDouble); + __ cmp_and_brx_short(O2, JVM_CONSTANT_Long, Assembler::notEqual, Assembler::pt, notLong); + __ ldx(G3_scratch, base_offset, Otos_l); + __ push(ltos); + __ ba_short(exit); + + __ bind(notLong); + condy_helper(exit); + + __ bind(exit); +} + +void TemplateTable::condy_helper(Label& exit) { + Register Robj = Otos_i; + Register Roffset = G4_scratch; + Register Rflags = G1_scratch; + + address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); + + __ set((int)bytecode(), O1); + __ call_VM(Robj, entry, O1); + + // Get vm_result_2 has flags = (tos, off) using format CPCE::_flags + __ get_vm_result_2(G3_scratch); + + // Get offset + __ set((int)ConstantPoolCacheEntry::field_index_mask, Roffset); + __ and3(G3_scratch, Roffset, Roffset); + + // compute type + __ srl(G3_scratch, ConstantPoolCacheEntry::tos_state_shift, Rflags); + // Make sure we don't need to mask Rflags after the above shift + ConstantPoolCacheEntry::verify_tos_state_shift(); + + switch (bytecode()) { + case Bytecodes::_ldc: + case Bytecodes::_ldc_w: + { + // tos in (itos, ftos, stos, btos, ctos, ztos) + Label notInt, notFloat, notShort, notByte, notChar, notBool; + __ cmp(Rflags, itos); + __ br(Assembler::notEqual, false, Assembler::pt, notInt); + __ delayed()->cmp(Rflags, ftos); + // itos + __ ld(Robj, Roffset, Otos_i); + __ push(itos); + __ ba_short(exit); + + __ bind(notInt); + __ br(Assembler::notEqual, false, Assembler::pt, notFloat); + __ delayed()->cmp(Rflags, stos); + // ftos + __ ldf(FloatRegisterImpl::S, Robj, Roffset, Ftos_f); + __ push(ftos); + __ ba_short(exit); + + __ bind(notFloat); + __ br(Assembler::notEqual, false, Assembler::pt, notShort); + __ delayed()->cmp(Rflags, btos); + // stos + __ ldsh(Robj, Roffset, Otos_i); + __ push(itos); + __ ba_short(exit); + + __ bind(notShort); + __ br(Assembler::notEqual, false, Assembler::pt, notByte); + __ delayed()->cmp(Rflags, ctos); + // btos + __ ldsb(Robj, Roffset, Otos_i); + __ push(itos); + __ ba_short(exit); + + __ bind(notByte); + __ br(Assembler::notEqual, false, Assembler::pt, notChar); + __ delayed()->cmp(Rflags, ztos); + // ctos + __ lduh(Robj, Roffset, Otos_i); + __ push(itos); + __ ba_short(exit); + + __ bind(notChar); + __ br(Assembler::notEqual, false, Assembler::pt, notBool); + __ delayed()->nop(); + // ztos + __ ldsb(Robj, Roffset, Otos_i); + __ push(itos); + __ ba_short(exit); + + __ bind(notBool); + break; + } + + case Bytecodes::_ldc2_w: + { + Label notLong, notDouble; + __ cmp(Rflags, ltos); + __ br(Assembler::notEqual, false, Assembler::pt, notLong); + __ delayed()->cmp(Rflags, dtos); + // ltos + // load must be atomic + __ ld_long(Robj, Roffset, Otos_l); + __ push(ltos); + __ ba_short(exit); + + __ bind(notLong); + __ br(Assembler::notEqual, false, Assembler::pt, notDouble); + __ delayed()->nop(); + // dtos + __ ldf(FloatRegisterImpl::D, Robj, Roffset, Ftos_d); + __ push(dtos); + __ ba_short(exit); + + __ bind(notDouble); + break; + } + + default: + ShouldNotReachHere(); + } + + __ stop("bad ldc/condy"); + + __ bind(exit); +} + +void TemplateTable::locals_index(Register reg, int offset) { + __ ldub( at_bcp(offset), reg ); +} + +void TemplateTable::locals_index_wide(Register reg) { + // offset is 2, not 1, because Lbcp points to wide prefix code + __ get_2_byte_integer_at_bcp(2, G4_scratch, reg, InterpreterMacroAssembler::Unsigned); +} + +void TemplateTable::iload() { + iload_internal(); +} + +void TemplateTable::nofast_iload() { + iload_internal(may_not_rewrite); +} + +void TemplateTable::iload_internal(RewriteControl rc) { + transition(vtos, itos); + // Rewrite iload,iload pair into fast_iload2 + // iload,caload pair into fast_icaload + if (RewriteFrequentPairs && rc == may_rewrite) { + Label rewrite, done; + + // get next byte + __ ldub(at_bcp(Bytecodes::length_for(Bytecodes::_iload)), G3_scratch); + + // if _iload, wait to rewrite to iload2. We only want to rewrite the + // last two iloads in a pair. Comparing against fast_iload means that + // the next bytecode is neither an iload or a caload, and therefore + // an iload pair. + __ cmp_and_br_short(G3_scratch, (int)Bytecodes::_iload, Assembler::equal, Assembler::pn, done); + + __ cmp(G3_scratch, (int)Bytecodes::_fast_iload); + __ br(Assembler::equal, false, Assembler::pn, rewrite); + __ delayed()->set(Bytecodes::_fast_iload2, G4_scratch); + + __ cmp(G3_scratch, (int)Bytecodes::_caload); + __ br(Assembler::equal, false, Assembler::pn, rewrite); + __ delayed()->set(Bytecodes::_fast_icaload, G4_scratch); + + __ set(Bytecodes::_fast_iload, G4_scratch); // don't check again + // rewrite + // G4_scratch: fast bytecode + __ bind(rewrite); + patch_bytecode(Bytecodes::_iload, G4_scratch, G3_scratch, false); + __ bind(done); + } + + // Get the local value into tos + locals_index(G3_scratch); + __ access_local_int( G3_scratch, Otos_i ); +} + +void TemplateTable::fast_iload2() { + transition(vtos, itos); + locals_index(G3_scratch); + __ access_local_int( G3_scratch, Otos_i ); + __ push_i(); + locals_index(G3_scratch, 3); // get next bytecode's local index. + __ access_local_int( G3_scratch, Otos_i ); +} + +void TemplateTable::fast_iload() { + transition(vtos, itos); + locals_index(G3_scratch); + __ access_local_int( G3_scratch, Otos_i ); +} + +void TemplateTable::lload() { + transition(vtos, ltos); + locals_index(G3_scratch); + __ access_local_long( G3_scratch, Otos_l ); +} + + +void TemplateTable::fload() { + transition(vtos, ftos); + locals_index(G3_scratch); + __ access_local_float( G3_scratch, Ftos_f ); +} + + +void TemplateTable::dload() { + transition(vtos, dtos); + locals_index(G3_scratch); + __ access_local_double( G3_scratch, Ftos_d ); +} + + +void TemplateTable::aload() { + transition(vtos, atos); + locals_index(G3_scratch); + __ access_local_ptr( G3_scratch, Otos_i); +} + + +void TemplateTable::wide_iload() { + transition(vtos, itos); + locals_index_wide(G3_scratch); + __ access_local_int( G3_scratch, Otos_i ); +} + + +void TemplateTable::wide_lload() { + transition(vtos, ltos); + locals_index_wide(G3_scratch); + __ access_local_long( G3_scratch, Otos_l ); +} + + +void TemplateTable::wide_fload() { + transition(vtos, ftos); + locals_index_wide(G3_scratch); + __ access_local_float( G3_scratch, Ftos_f ); +} + + +void TemplateTable::wide_dload() { + transition(vtos, dtos); + locals_index_wide(G3_scratch); + __ access_local_double( G3_scratch, Ftos_d ); +} + + +void TemplateTable::wide_aload() { + transition(vtos, atos); + locals_index_wide(G3_scratch); + __ access_local_ptr( G3_scratch, Otos_i ); + __ verify_oop(Otos_i); +} + + +void TemplateTable::iaload() { + transition(itos, itos); + // Otos_i: index + // tos: array + __ index_check(O2, Otos_i, LogBytesPerInt, G3_scratch, O3); + __ ld(O3, arrayOopDesc::base_offset_in_bytes(T_INT), Otos_i); +} + + +void TemplateTable::laload() { + transition(itos, ltos); + // Otos_i: index + // O2: array + __ index_check(O2, Otos_i, LogBytesPerLong, G3_scratch, O3); + __ ld_long(O3, arrayOopDesc::base_offset_in_bytes(T_LONG), Otos_l); +} + + +void TemplateTable::faload() { + transition(itos, ftos); + // Otos_i: index + // O2: array + __ index_check(O2, Otos_i, LogBytesPerInt, G3_scratch, O3); + __ ldf(FloatRegisterImpl::S, O3, arrayOopDesc::base_offset_in_bytes(T_FLOAT), Ftos_f); +} + + +void TemplateTable::daload() { + transition(itos, dtos); + // Otos_i: index + // O2: array + __ index_check(O2, Otos_i, LogBytesPerLong, G3_scratch, O3); + __ ldf(FloatRegisterImpl::D, O3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE), Ftos_d); +} + + +void TemplateTable::aaload() { + transition(itos, atos); + // Otos_i: index + // tos: array + __ index_check(O2, Otos_i, UseCompressedOops ? 2 : LogBytesPerWord, G3_scratch, O3); + do_oop_load(_masm, + O3, + noreg, + arrayOopDesc::base_offset_in_bytes(T_OBJECT), + Otos_i, + G3_scratch, + IS_ARRAY); + __ verify_oop(Otos_i); +} + + +void TemplateTable::baload() { + transition(itos, itos); + // Otos_i: index + // tos: array + __ index_check(O2, Otos_i, 0, G3_scratch, O3); + __ ldsb(O3, arrayOopDesc::base_offset_in_bytes(T_BYTE), Otos_i); +} + + +void TemplateTable::caload() { + transition(itos, itos); + // Otos_i: index + // tos: array + __ index_check(O2, Otos_i, LogBytesPerShort, G3_scratch, O3); + __ lduh(O3, arrayOopDesc::base_offset_in_bytes(T_CHAR), Otos_i); +} + +void TemplateTable::fast_icaload() { + transition(vtos, itos); + // Otos_i: index + // tos: array + locals_index(G3_scratch); + __ access_local_int( G3_scratch, Otos_i ); + __ index_check(O2, Otos_i, LogBytesPerShort, G3_scratch, O3); + __ lduh(O3, arrayOopDesc::base_offset_in_bytes(T_CHAR), Otos_i); +} + + +void TemplateTable::saload() { + transition(itos, itos); + // Otos_i: index + // tos: array + __ index_check(O2, Otos_i, LogBytesPerShort, G3_scratch, O3); + __ ldsh(O3, arrayOopDesc::base_offset_in_bytes(T_SHORT), Otos_i); +} + + +void TemplateTable::iload(int n) { + transition(vtos, itos); + __ ld( Llocals, Interpreter::local_offset_in_bytes(n), Otos_i ); +} + + +void TemplateTable::lload(int n) { + transition(vtos, ltos); + assert(n+1 < Argument::n_register_parameters, "would need more code"); + __ load_unaligned_long(Llocals, Interpreter::local_offset_in_bytes(n+1), Otos_l); +} + + +void TemplateTable::fload(int n) { + transition(vtos, ftos); + assert(n < Argument::n_register_parameters, "would need more code"); + __ ldf( FloatRegisterImpl::S, Llocals, Interpreter::local_offset_in_bytes(n), Ftos_f ); +} + + +void TemplateTable::dload(int n) { + transition(vtos, dtos); + FloatRegister dst = Ftos_d; + __ load_unaligned_double(Llocals, Interpreter::local_offset_in_bytes(n+1), dst); +} + + +void TemplateTable::aload(int n) { + transition(vtos, atos); + __ ld_ptr( Llocals, Interpreter::local_offset_in_bytes(n), Otos_i ); +} + +void TemplateTable::aload_0() { + aload_0_internal(); +} + +void TemplateTable::nofast_aload_0() { + aload_0_internal(may_not_rewrite); +} + +void TemplateTable::aload_0_internal(RewriteControl rc) { + transition(vtos, atos); + + // According to bytecode histograms, the pairs: + // + // _aload_0, _fast_igetfield (itos) + // _aload_0, _fast_agetfield (atos) + // _aload_0, _fast_fgetfield (ftos) + // + // occur frequently. If RewriteFrequentPairs is set, the (slow) _aload_0 + // bytecode checks the next bytecode and then rewrites the current + // bytecode into a pair bytecode; otherwise it rewrites the current + // bytecode into _fast_aload_0 that doesn't do the pair check anymore. + // + if (RewriteFrequentPairs && rc == may_rewrite) { + Label rewrite, done; + + // get next byte + __ ldub(at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)), G3_scratch); + + // if _getfield then wait with rewrite + __ cmp_and_br_short(G3_scratch, (int)Bytecodes::_getfield, Assembler::equal, Assembler::pn, done); + + // if _igetfield then rewrite to _fast_iaccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "adjust fast bytecode def"); + __ cmp(G3_scratch, (int)Bytecodes::_fast_igetfield); + __ br(Assembler::equal, false, Assembler::pn, rewrite); + __ delayed()->set(Bytecodes::_fast_iaccess_0, G4_scratch); + + // if _agetfield then rewrite to _fast_aaccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "adjust fast bytecode def"); + __ cmp(G3_scratch, (int)Bytecodes::_fast_agetfield); + __ br(Assembler::equal, false, Assembler::pn, rewrite); + __ delayed()->set(Bytecodes::_fast_aaccess_0, G4_scratch); + + // if _fgetfield then rewrite to _fast_faccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "adjust fast bytecode def"); + __ cmp(G3_scratch, (int)Bytecodes::_fast_fgetfield); + __ br(Assembler::equal, false, Assembler::pn, rewrite); + __ delayed()->set(Bytecodes::_fast_faccess_0, G4_scratch); + + // else rewrite to _fast_aload0 + assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "adjust fast bytecode def"); + __ set(Bytecodes::_fast_aload_0, G4_scratch); + + // rewrite + // G4_scratch: fast bytecode + __ bind(rewrite); + patch_bytecode(Bytecodes::_aload_0, G4_scratch, G3_scratch, false); + __ bind(done); + } + + // Do actual aload_0 (must do this after patch_bytecode which might call VM and GC might change oop). + aload(0); +} + +void TemplateTable::istore() { + transition(itos, vtos); + locals_index(G3_scratch); + __ store_local_int( G3_scratch, Otos_i ); +} + + +void TemplateTable::lstore() { + transition(ltos, vtos); + locals_index(G3_scratch); + __ store_local_long( G3_scratch, Otos_l ); +} + + +void TemplateTable::fstore() { + transition(ftos, vtos); + locals_index(G3_scratch); + __ store_local_float( G3_scratch, Ftos_f ); +} + + +void TemplateTable::dstore() { + transition(dtos, vtos); + locals_index(G3_scratch); + __ store_local_double( G3_scratch, Ftos_d ); +} + + +void TemplateTable::astore() { + transition(vtos, vtos); + __ load_ptr(0, Otos_i); + __ inc(Lesp, Interpreter::stackElementSize); + __ verify_oop_or_return_address(Otos_i, G3_scratch); + locals_index(G3_scratch); + __ store_local_ptr(G3_scratch, Otos_i); +} + + +void TemplateTable::wide_istore() { + transition(vtos, vtos); + __ pop_i(); + locals_index_wide(G3_scratch); + __ store_local_int( G3_scratch, Otos_i ); +} + + +void TemplateTable::wide_lstore() { + transition(vtos, vtos); + __ pop_l(); + locals_index_wide(G3_scratch); + __ store_local_long( G3_scratch, Otos_l ); +} + + +void TemplateTable::wide_fstore() { + transition(vtos, vtos); + __ pop_f(); + locals_index_wide(G3_scratch); + __ store_local_float( G3_scratch, Ftos_f ); +} + + +void TemplateTable::wide_dstore() { + transition(vtos, vtos); + __ pop_d(); + locals_index_wide(G3_scratch); + __ store_local_double( G3_scratch, Ftos_d ); +} + + +void TemplateTable::wide_astore() { + transition(vtos, vtos); + __ load_ptr(0, Otos_i); + __ inc(Lesp, Interpreter::stackElementSize); + __ verify_oop_or_return_address(Otos_i, G3_scratch); + locals_index_wide(G3_scratch); + __ store_local_ptr(G3_scratch, Otos_i); +} + + +void TemplateTable::iastore() { + transition(itos, vtos); + __ pop_i(O2); // index + // Otos_i: val + // O3: array + __ index_check(O3, O2, LogBytesPerInt, G3_scratch, O2); + __ st(Otos_i, O2, arrayOopDesc::base_offset_in_bytes(T_INT)); +} + + +void TemplateTable::lastore() { + transition(ltos, vtos); + __ pop_i(O2); // index + // Otos_l: val + // O3: array + __ index_check(O3, O2, LogBytesPerLong, G3_scratch, O2); + __ st_long(Otos_l, O2, arrayOopDesc::base_offset_in_bytes(T_LONG)); +} + + +void TemplateTable::fastore() { + transition(ftos, vtos); + __ pop_i(O2); // index + // Ftos_f: val + // O3: array + __ index_check(O3, O2, LogBytesPerInt, G3_scratch, O2); + __ stf(FloatRegisterImpl::S, Ftos_f, O2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); +} + + +void TemplateTable::dastore() { + transition(dtos, vtos); + __ pop_i(O2); // index + // Fos_d: val + // O3: array + __ index_check(O3, O2, LogBytesPerLong, G3_scratch, O2); + __ stf(FloatRegisterImpl::D, Ftos_d, O2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)); +} + + +void TemplateTable::aastore() { + Label store_ok, is_null, done; + transition(vtos, vtos); + __ ld_ptr(Lesp, Interpreter::expr_offset_in_bytes(0), Otos_i); + __ ld(Lesp, Interpreter::expr_offset_in_bytes(1), O2); // get index + __ ld_ptr(Lesp, Interpreter::expr_offset_in_bytes(2), O3); // get array + // Otos_i: val + // O2: index + // O3: array + __ verify_oop(Otos_i); + __ index_check_without_pop(O3, O2, UseCompressedOops ? 2 : LogBytesPerWord, G3_scratch, O1); + + // do array store check - check for NULL value first + __ br_null_short( Otos_i, Assembler::pn, is_null ); + + __ load_klass(O3, O4); // get array klass + __ load_klass(Otos_i, O5); // get value klass + + // do fast instanceof cache test + + __ ld_ptr(O4, in_bytes(ObjArrayKlass::element_klass_offset()), O4); + + assert(Otos_i == O0, "just checking"); + + // Otos_i: value + // O1: addr - offset + // O2: index + // O3: array + // O4: array element klass + // O5: value klass + + // Address element(O1, 0, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + + // Generate a fast subtype check. Branch to store_ok if no + // failure. Throw if failure. + __ gen_subtype_check( O5, O4, G3_scratch, G4_scratch, G1_scratch, store_ok ); + + // Not a subtype; so must throw exception + __ throw_if_not_x( Assembler::never, Interpreter::_throw_ArrayStoreException_entry, G3_scratch ); + + // Store is OK. + __ bind(store_ok); + do_oop_store(_masm, O1, noreg, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Otos_i, G3_scratch, IS_ARRAY); + + __ ba(done); + __ delayed()->inc(Lesp, 3* Interpreter::stackElementSize); // adj sp (pops array, index and value) + + __ bind(is_null); + do_oop_store(_masm, O1, noreg, arrayOopDesc::base_offset_in_bytes(T_OBJECT), G0, G4_scratch, IS_ARRAY); + + __ profile_null_seen(G3_scratch); + __ inc(Lesp, 3* Interpreter::stackElementSize); // adj sp (pops array, index and value) + __ bind(done); +} + + +void TemplateTable::bastore() { + transition(itos, vtos); + __ pop_i(O2); // index + // Otos_i: val + // O2: index + // O3: array + __ index_check(O3, O2, 0, G3_scratch, O2); + // Need to check whether array is boolean or byte + // since both types share the bastore bytecode. + __ load_klass(O3, G4_scratch); + __ ld(G4_scratch, in_bytes(Klass::layout_helper_offset()), G4_scratch); + __ set(Klass::layout_helper_boolean_diffbit(), G3_scratch); + __ andcc(G3_scratch, G4_scratch, G0); + Label L_skip; + __ br(Assembler::zero, false, Assembler::pn, L_skip); + __ delayed()->nop(); + __ and3(Otos_i, 1, Otos_i); // if it is a T_BOOLEAN array, mask the stored value to 0/1 + __ bind(L_skip); + __ stb(Otos_i, O2, arrayOopDesc::base_offset_in_bytes(T_BYTE)); +} + + +void TemplateTable::castore() { + transition(itos, vtos); + __ pop_i(O2); // index + // Otos_i: val + // O3: array + __ index_check(O3, O2, LogBytesPerShort, G3_scratch, O2); + __ sth(Otos_i, O2, arrayOopDesc::base_offset_in_bytes(T_CHAR)); +} + + +void TemplateTable::sastore() { + // %%%%% Factor across platform + castore(); +} + + +void TemplateTable::istore(int n) { + transition(itos, vtos); + __ st(Otos_i, Llocals, Interpreter::local_offset_in_bytes(n)); +} + + +void TemplateTable::lstore(int n) { + transition(ltos, vtos); + assert(n+1 < Argument::n_register_parameters, "only handle register cases"); + __ store_unaligned_long(Otos_l, Llocals, Interpreter::local_offset_in_bytes(n+1)); + +} + + +void TemplateTable::fstore(int n) { + transition(ftos, vtos); + assert(n < Argument::n_register_parameters, "only handle register cases"); + __ stf(FloatRegisterImpl::S, Ftos_f, Llocals, Interpreter::local_offset_in_bytes(n)); +} + + +void TemplateTable::dstore(int n) { + transition(dtos, vtos); + FloatRegister src = Ftos_d; + __ store_unaligned_double(src, Llocals, Interpreter::local_offset_in_bytes(n+1)); +} + + +void TemplateTable::astore(int n) { + transition(vtos, vtos); + __ load_ptr(0, Otos_i); + __ inc(Lesp, Interpreter::stackElementSize); + __ verify_oop_or_return_address(Otos_i, G3_scratch); + __ store_local_ptr(n, Otos_i); +} + + +void TemplateTable::pop() { + transition(vtos, vtos); + __ inc(Lesp, Interpreter::stackElementSize); +} + + +void TemplateTable::pop2() { + transition(vtos, vtos); + __ inc(Lesp, 2 * Interpreter::stackElementSize); +} + + +void TemplateTable::dup() { + transition(vtos, vtos); + // stack: ..., a + // load a and tag + __ load_ptr(0, Otos_i); + __ push_ptr(Otos_i); + // stack: ..., a, a +} + + +void TemplateTable::dup_x1() { + transition(vtos, vtos); + // stack: ..., a, b + __ load_ptr( 1, G3_scratch); // get a + __ load_ptr( 0, Otos_l1); // get b + __ store_ptr(1, Otos_l1); // put b + __ store_ptr(0, G3_scratch); // put a - like swap + __ push_ptr(Otos_l1); // push b + // stack: ..., b, a, b +} + + +void TemplateTable::dup_x2() { + transition(vtos, vtos); + // stack: ..., a, b, c + // get c and push on stack, reuse registers + __ load_ptr( 0, G3_scratch); // get c + __ push_ptr(G3_scratch); // push c with tag + // stack: ..., a, b, c, c (c in reg) (Lesp - 4) + // (stack offsets n+1 now) + __ load_ptr( 3, Otos_l1); // get a + __ store_ptr(3, G3_scratch); // put c at 3 + // stack: ..., c, b, c, c (a in reg) + __ load_ptr( 2, G3_scratch); // get b + __ store_ptr(2, Otos_l1); // put a at 2 + // stack: ..., c, a, c, c (b in reg) + __ store_ptr(1, G3_scratch); // put b at 1 + // stack: ..., c, a, b, c +} + + +void TemplateTable::dup2() { + transition(vtos, vtos); + __ load_ptr(1, G3_scratch); // get a + __ load_ptr(0, Otos_l1); // get b + __ push_ptr(G3_scratch); // push a + __ push_ptr(Otos_l1); // push b + // stack: ..., a, b, a, b +} + + +void TemplateTable::dup2_x1() { + transition(vtos, vtos); + // stack: ..., a, b, c + __ load_ptr( 1, Lscratch); // get b + __ load_ptr( 2, Otos_l1); // get a + __ store_ptr(2, Lscratch); // put b at a + // stack: ..., b, b, c + __ load_ptr( 0, G3_scratch); // get c + __ store_ptr(1, G3_scratch); // put c at b + // stack: ..., b, c, c + __ store_ptr(0, Otos_l1); // put a at c + // stack: ..., b, c, a + __ push_ptr(Lscratch); // push b + __ push_ptr(G3_scratch); // push c + // stack: ..., b, c, a, b, c +} + + +// The spec says that these types can be a mixture of category 1 (1 word) +// types and/or category 2 types (long and doubles) +void TemplateTable::dup2_x2() { + transition(vtos, vtos); + // stack: ..., a, b, c, d + __ load_ptr( 1, Lscratch); // get c + __ load_ptr( 3, Otos_l1); // get a + __ store_ptr(3, Lscratch); // put c at 3 + __ store_ptr(1, Otos_l1); // put a at 1 + // stack: ..., c, b, a, d + __ load_ptr( 2, G3_scratch); // get b + __ load_ptr( 0, Otos_l1); // get d + __ store_ptr(0, G3_scratch); // put b at 0 + __ store_ptr(2, Otos_l1); // put d at 2 + // stack: ..., c, d, a, b + __ push_ptr(Lscratch); // push c + __ push_ptr(Otos_l1); // push d + // stack: ..., c, d, a, b, c, d +} + + +void TemplateTable::swap() { + transition(vtos, vtos); + // stack: ..., a, b + __ load_ptr( 1, G3_scratch); // get a + __ load_ptr( 0, Otos_l1); // get b + __ store_ptr(0, G3_scratch); // put b + __ store_ptr(1, Otos_l1); // put a + // stack: ..., b, a +} + + +void TemplateTable::iop2(Operation op) { + transition(itos, itos); + __ pop_i(O1); + switch (op) { + case add: __ add(O1, Otos_i, Otos_i); break; + case sub: __ sub(O1, Otos_i, Otos_i); break; + // %%%%% Mul may not exist: better to call .mul? + case mul: __ smul(O1, Otos_i, Otos_i); break; + case _and: __ and3(O1, Otos_i, Otos_i); break; + case _or: __ or3(O1, Otos_i, Otos_i); break; + case _xor: __ xor3(O1, Otos_i, Otos_i); break; + case shl: __ sll(O1, Otos_i, Otos_i); break; + case shr: __ sra(O1, Otos_i, Otos_i); break; + case ushr: __ srl(O1, Otos_i, Otos_i); break; + default: ShouldNotReachHere(); + } +} + + +void TemplateTable::lop2(Operation op) { + transition(ltos, ltos); + __ pop_l(O2); + switch (op) { + case add: __ add(O2, Otos_l, Otos_l); break; + case sub: __ sub(O2, Otos_l, Otos_l); break; + case _and: __ and3(O2, Otos_l, Otos_l); break; + case _or: __ or3(O2, Otos_l, Otos_l); break; + case _xor: __ xor3(O2, Otos_l, Otos_l); break; + default: ShouldNotReachHere(); + } +} + + +void TemplateTable::idiv() { + // %%%%% Later: ForSPARC/V7 call .sdiv library routine, + // %%%%% Use ldsw...sdivx on pure V9 ABI. 64 bit safe. + + transition(itos, itos); + __ pop_i(O1); // get 1st op + + // Y contains upper 32 bits of result, set it to 0 or all ones + __ wry(G0); + __ mov(~0, G3_scratch); + + __ tst(O1); + Label neg; + __ br(Assembler::negative, true, Assembler::pn, neg); + __ delayed()->wry(G3_scratch); + __ bind(neg); + + Label ok; + __ tst(Otos_i); + __ throw_if_not_icc( Assembler::notZero, Interpreter::_throw_ArithmeticException_entry, G3_scratch ); + + const int min_int = 0x80000000; + Label regular; + __ cmp(Otos_i, -1); + __ br(Assembler::notEqual, false, Assembler::pt, regular); + // Don't put set in delay slot + // Set will turn into multiple instructions in 64 bit mode + __ delayed()->nop(); + __ set(min_int, G4_scratch); + Label done; + __ cmp(O1, G4_scratch); + __ br(Assembler::equal, true, Assembler::pt, done); + __ delayed()->mov(O1, Otos_i); // (mov only executed if branch taken) + + __ bind(regular); + __ sdiv(O1, Otos_i, Otos_i); // note: irem uses O1 after this instruction! + __ bind(done); +} + + +void TemplateTable::irem() { + transition(itos, itos); + __ mov(Otos_i, O2); // save divisor + idiv(); // %%%% Hack: exploits fact that idiv leaves dividend in O1 + __ smul(Otos_i, O2, Otos_i); + __ sub(O1, Otos_i, Otos_i); +} + + +void TemplateTable::lmul() { + transition(ltos, ltos); + __ pop_l(O2); + __ mulx(Otos_l, O2, Otos_l); + +} + + +void TemplateTable::ldiv() { + transition(ltos, ltos); + + // check for zero + __ pop_l(O2); + __ tst(Otos_l); + __ throw_if_not_xcc( Assembler::notZero, Interpreter::_throw_ArithmeticException_entry, G3_scratch); + __ sdivx(O2, Otos_l, Otos_l); +} + + +void TemplateTable::lrem() { + transition(ltos, ltos); + + // check for zero + __ pop_l(O2); + __ tst(Otos_l); + __ throw_if_not_xcc( Assembler::notZero, Interpreter::_throw_ArithmeticException_entry, G3_scratch); + __ sdivx(O2, Otos_l, Otos_l2); + __ mulx (Otos_l2, Otos_l, Otos_l2); + __ sub (O2, Otos_l2, Otos_l); +} + + +void TemplateTable::lshl() { + transition(itos, ltos); // %%%% could optimize, fill delay slot or opt for ultra + + __ pop_l(O2); // shift value in O2, O3 + __ sllx(O2, Otos_i, Otos_l); +} + + +void TemplateTable::lshr() { + transition(itos, ltos); // %%%% see lshl comment + + __ pop_l(O2); // shift value in O2, O3 + __ srax(O2, Otos_i, Otos_l); +} + + + +void TemplateTable::lushr() { + transition(itos, ltos); // %%%% see lshl comment + + __ pop_l(O2); // shift value in O2, O3 + __ srlx(O2, Otos_i, Otos_l); +} + + +void TemplateTable::fop2(Operation op) { + transition(ftos, ftos); + switch (op) { + case add: __ pop_f(F4); __ fadd(FloatRegisterImpl::S, F4, Ftos_f, Ftos_f); break; + case sub: __ pop_f(F4); __ fsub(FloatRegisterImpl::S, F4, Ftos_f, Ftos_f); break; + case mul: __ pop_f(F4); __ fmul(FloatRegisterImpl::S, F4, Ftos_f, Ftos_f); break; + case div: __ pop_f(F4); __ fdiv(FloatRegisterImpl::S, F4, Ftos_f, Ftos_f); break; + case rem: + assert(Ftos_f == F0, "just checking"); + // LP64 calling conventions use F1, F3 for passing 2 floats + __ pop_f(F1); + __ fmov(FloatRegisterImpl::S, Ftos_f, F3); + __ call_VM_leaf(Lscratch, CAST_FROM_FN_PTR(address, SharedRuntime::frem)); + assert( Ftos_f == F0, "fix this code" ); + break; + + default: ShouldNotReachHere(); + } +} + + +void TemplateTable::dop2(Operation op) { + transition(dtos, dtos); + switch (op) { + case add: __ pop_d(F4); __ fadd(FloatRegisterImpl::D, F4, Ftos_d, Ftos_d); break; + case sub: __ pop_d(F4); __ fsub(FloatRegisterImpl::D, F4, Ftos_d, Ftos_d); break; + case mul: __ pop_d(F4); __ fmul(FloatRegisterImpl::D, F4, Ftos_d, Ftos_d); break; + case div: __ pop_d(F4); __ fdiv(FloatRegisterImpl::D, F4, Ftos_d, Ftos_d); break; + case rem: + // Pass arguments in D0, D2 + __ fmov(FloatRegisterImpl::D, Ftos_f, F2 ); + __ pop_d( F0 ); + __ call_VM_leaf(Lscratch, CAST_FROM_FN_PTR(address, SharedRuntime::drem)); + assert( Ftos_d == F0, "fix this code" ); + break; + + default: ShouldNotReachHere(); + } +} + + +void TemplateTable::ineg() { + transition(itos, itos); + __ neg(Otos_i); +} + + +void TemplateTable::lneg() { + transition(ltos, ltos); + __ sub(G0, Otos_l, Otos_l); +} + + +void TemplateTable::fneg() { + transition(ftos, ftos); + __ fneg(FloatRegisterImpl::S, Ftos_f, Ftos_f); +} + + +void TemplateTable::dneg() { + transition(dtos, dtos); + __ fneg(FloatRegisterImpl::D, Ftos_f, Ftos_f); +} + + +void TemplateTable::iinc() { + transition(vtos, vtos); + locals_index(G3_scratch); + __ ldsb(Lbcp, 2, O2); // load constant + __ access_local_int(G3_scratch, Otos_i); + __ add(Otos_i, O2, Otos_i); + __ st(Otos_i, G3_scratch, 0); // access_local_int puts E.A. in G3_scratch +} + + +void TemplateTable::wide_iinc() { + transition(vtos, vtos); + locals_index_wide(G3_scratch); + __ get_2_byte_integer_at_bcp( 4, O2, O3, InterpreterMacroAssembler::Signed); + __ access_local_int(G3_scratch, Otos_i); + __ add(Otos_i, O3, Otos_i); + __ st(Otos_i, G3_scratch, 0); // access_local_int puts E.A. in G3_scratch +} + + +void TemplateTable::convert() { +// %%%%% Factor this first part across platforms + #ifdef ASSERT + TosState tos_in = ilgl; + TosState tos_out = ilgl; + switch (bytecode()) { + case Bytecodes::_i2l: // fall through + case Bytecodes::_i2f: // fall through + case Bytecodes::_i2d: // fall through + case Bytecodes::_i2b: // fall through + case Bytecodes::_i2c: // fall through + case Bytecodes::_i2s: tos_in = itos; break; + case Bytecodes::_l2i: // fall through + case Bytecodes::_l2f: // fall through + case Bytecodes::_l2d: tos_in = ltos; break; + case Bytecodes::_f2i: // fall through + case Bytecodes::_f2l: // fall through + case Bytecodes::_f2d: tos_in = ftos; break; + case Bytecodes::_d2i: // fall through + case Bytecodes::_d2l: // fall through + case Bytecodes::_d2f: tos_in = dtos; break; + default : ShouldNotReachHere(); + } + switch (bytecode()) { + case Bytecodes::_l2i: // fall through + case Bytecodes::_f2i: // fall through + case Bytecodes::_d2i: // fall through + case Bytecodes::_i2b: // fall through + case Bytecodes::_i2c: // fall through + case Bytecodes::_i2s: tos_out = itos; break; + case Bytecodes::_i2l: // fall through + case Bytecodes::_f2l: // fall through + case Bytecodes::_d2l: tos_out = ltos; break; + case Bytecodes::_i2f: // fall through + case Bytecodes::_l2f: // fall through + case Bytecodes::_d2f: tos_out = ftos; break; + case Bytecodes::_i2d: // fall through + case Bytecodes::_l2d: // fall through + case Bytecodes::_f2d: tos_out = dtos; break; + default : ShouldNotReachHere(); + } + transition(tos_in, tos_out); + #endif + + + // Conversion + Label done; + switch (bytecode()) { + case Bytecodes::_i2l: + // Sign extend the 32 bits + __ sra ( Otos_i, 0, Otos_l ); + break; + + case Bytecodes::_i2f: + __ st(Otos_i, __ d_tmp ); + __ ldf(FloatRegisterImpl::S, __ d_tmp, F0); + __ fitof(FloatRegisterImpl::S, F0, Ftos_f); + break; + + case Bytecodes::_i2d: + __ st(Otos_i, __ d_tmp); + __ ldf(FloatRegisterImpl::S, __ d_tmp, F0); + __ fitof(FloatRegisterImpl::D, F0, Ftos_f); + break; + + case Bytecodes::_i2b: + __ sll(Otos_i, 24, Otos_i); + __ sra(Otos_i, 24, Otos_i); + break; + + case Bytecodes::_i2c: + __ sll(Otos_i, 16, Otos_i); + __ srl(Otos_i, 16, Otos_i); + break; + + case Bytecodes::_i2s: + __ sll(Otos_i, 16, Otos_i); + __ sra(Otos_i, 16, Otos_i); + break; + + case Bytecodes::_l2i: + // Sign-extend into the high 32 bits + __ sra(Otos_l, 0, Otos_i); + break; + + case Bytecodes::_l2f: + case Bytecodes::_l2d: + __ st_long(Otos_l, __ d_tmp); + __ ldf(FloatRegisterImpl::D, __ d_tmp, Ftos_d); + + if (bytecode() == Bytecodes::_l2f) { + __ fxtof(FloatRegisterImpl::S, Ftos_d, Ftos_f); + } else { + __ fxtof(FloatRegisterImpl::D, Ftos_d, Ftos_d); + } + break; + + case Bytecodes::_f2i: { + Label isNaN; + // result must be 0 if value is NaN; test by comparing value to itself + __ fcmp(FloatRegisterImpl::S, Assembler::fcc0, Ftos_f, Ftos_f); + __ fb(Assembler::f_unordered, true, Assembler::pn, isNaN); + __ delayed()->clr(Otos_i); // NaN + __ ftoi(FloatRegisterImpl::S, Ftos_f, F30); + __ stf(FloatRegisterImpl::S, F30, __ d_tmp); + __ ld(__ d_tmp, Otos_i); + __ bind(isNaN); + } + break; + + case Bytecodes::_f2l: + // must uncache tos + __ push_f(); + __ pop_f(F1); + __ call_VM_leaf(Lscratch, CAST_FROM_FN_PTR(address, SharedRuntime::f2l)); + break; + + case Bytecodes::_f2d: + __ ftof( FloatRegisterImpl::S, FloatRegisterImpl::D, Ftos_f, Ftos_f); + break; + + case Bytecodes::_d2i: + case Bytecodes::_d2l: + // must uncache tos + __ push_d(); + // LP64 calling conventions pass first double arg in D0 + __ pop_d( Ftos_d ); + __ call_VM_leaf(Lscratch, + bytecode() == Bytecodes::_d2i + ? CAST_FROM_FN_PTR(address, SharedRuntime::d2i) + : CAST_FROM_FN_PTR(address, SharedRuntime::d2l)); + break; + + case Bytecodes::_d2f: + __ ftof( FloatRegisterImpl::D, FloatRegisterImpl::S, Ftos_d, Ftos_f); + break; + + default: ShouldNotReachHere(); + } + __ bind(done); +} + + +void TemplateTable::lcmp() { + transition(ltos, itos); + + __ pop_l(O1); // pop off value 1, value 2 is in O0 + __ lcmp( O1, Otos_l, Otos_i ); +} + + +void TemplateTable::float_cmp(bool is_float, int unordered_result) { + + if (is_float) __ pop_f(F2); + else __ pop_d(F2); + + assert(Ftos_f == F0 && Ftos_d == F0, "alias checking:"); + + __ float_cmp( is_float, unordered_result, F2, F0, Otos_i ); +} + +void TemplateTable::branch(bool is_jsr, bool is_wide) { + // Note: on SPARC, we use InterpreterMacroAssembler::if_cmp also. + __ verify_thread(); + + const Register O2_bumped_count = O2; + __ profile_taken_branch(G3_scratch, O2_bumped_count); + + // get (wide) offset to O1_disp + const Register O1_disp = O1; + if (is_wide) __ get_4_byte_integer_at_bcp( 1, G4_scratch, O1_disp, InterpreterMacroAssembler::set_CC); + else __ get_2_byte_integer_at_bcp( 1, G4_scratch, O1_disp, InterpreterMacroAssembler::Signed, InterpreterMacroAssembler::set_CC); + + // Handle all the JSR stuff here, then exit. + // It's much shorter and cleaner than intermingling with the + // non-JSR normal-branch stuff occurring below. + if( is_jsr ) { + // compute return address as bci in Otos_i + __ ld_ptr(Lmethod, Method::const_offset(), G3_scratch); + __ sub(Lbcp, G3_scratch, G3_scratch); + __ sub(G3_scratch, in_bytes(ConstMethod::codes_offset()) - (is_wide ? 5 : 3), Otos_i); + + // Bump Lbcp to target of JSR + __ add(Lbcp, O1_disp, Lbcp); + // Push returnAddress for "ret" on stack + __ push_ptr(Otos_i); + // And away we go! + __ dispatch_next(vtos, 0, true); + return; + } + + // Normal (non-jsr) branch handling + + // Save the current Lbcp + const Register l_cur_bcp = Lscratch; + __ mov( Lbcp, l_cur_bcp ); + + bool increment_invocation_counter_for_backward_branches = UseCompiler && UseLoopCounter; + if ( increment_invocation_counter_for_backward_branches ) { + Label Lforward; + // check branch direction + __ br( Assembler::positive, false, Assembler::pn, Lforward ); + // Bump bytecode pointer by displacement (take the branch) + __ delayed()->add( O1_disp, Lbcp, Lbcp ); // add to bc addr + + const Register G3_method_counters = G3_scratch; + __ get_method_counters(Lmethod, G3_method_counters, Lforward); + + Label Lno_mdo, Loverflow; + int increment = InvocationCounter::count_increment; + if (ProfileInterpreter) { + // If no method data exists, go to profile_continue. + __ ld_ptr(Lmethod, Method::method_data_offset(), G4_scratch); + __ br_null_short(G4_scratch, Assembler::pn, Lno_mdo); + + // Increment backedge counter in the MDO + Address mdo_backedge_counter(G4_scratch, in_bytes(MethodData::backedge_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + Address mask(G4_scratch, in_bytes(MethodData::backedge_mask_offset())); + __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, G3_scratch, O0, + (UseOnStackReplacement ? Assembler::notZero : Assembler::always), &Lforward); + __ ba_short(Loverflow); + } + + // If there's no MDO, increment counter in MethodCounters* + __ bind(Lno_mdo); + Address backedge_counter(G3_method_counters, + in_bytes(MethodCounters::backedge_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + Address mask(G3_method_counters, in_bytes(MethodCounters::backedge_mask_offset())); + __ increment_mask_and_jump(backedge_counter, increment, mask, G4_scratch, O0, + (UseOnStackReplacement ? Assembler::notZero : Assembler::always), &Lforward); + __ bind(Loverflow); + + // notify point for loop, pass branch bytecode + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), l_cur_bcp); + + // Was an OSR adapter generated? + // O0 = osr nmethod + __ br_null_short(O0, Assembler::pn, Lforward); + + // Has the nmethod been invalidated already? + __ ldub(O0, nmethod::state_offset(), O2); + __ cmp_and_br_short(O2, nmethod::in_use, Assembler::notEqual, Assembler::pn, Lforward); + + // migrate the interpreter frame off of the stack + + __ mov(G2_thread, L7); + // save nmethod + __ mov(O0, L6); + __ set_last_Java_frame(SP, noreg); + __ call_VM_leaf(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin), L7); + __ reset_last_Java_frame(); + __ mov(L7, G2_thread); + + // move OSR nmethod to I1 + __ mov(L6, I1); + + // OSR buffer to I0 + __ mov(O0, I0); + + // remove the interpreter frame + __ restore(I5_savedSP, 0, SP); + + // Jump to the osr code. + __ ld_ptr(O1, nmethod::osr_entry_point_offset(), O2); + __ jmp(O2, G0); + __ delayed()->nop(); + + __ bind(Lforward); + } else + // Bump bytecode pointer by displacement (take the branch) + __ add( O1_disp, Lbcp, Lbcp );// add to bc addr + + // continue with bytecode @ target + // %%%%% Like Intel, could speed things up by moving bytecode fetch to code above, + // %%%%% and changing dispatch_next to dispatch_only + __ dispatch_next(vtos, 0, true); +} + + +// Note Condition in argument is TemplateTable::Condition +// arg scope is within class scope + +void TemplateTable::if_0cmp(Condition cc) { + // no pointers, integer only! + transition(itos, vtos); + // assume branch is more often taken than not (loops use backward branches) + __ cmp( Otos_i, 0); + __ if_cmp(ccNot(cc), false); +} + + +void TemplateTable::if_icmp(Condition cc) { + transition(itos, vtos); + __ pop_i(O1); + __ cmp(O1, Otos_i); + __ if_cmp(ccNot(cc), false); +} + + +void TemplateTable::if_nullcmp(Condition cc) { + transition(atos, vtos); + __ tst(Otos_i); + __ if_cmp(ccNot(cc), true); +} + + +void TemplateTable::if_acmp(Condition cc) { + transition(atos, vtos); + __ pop_ptr(O1); + __ verify_oop(O1); + __ verify_oop(Otos_i); + __ cmp(O1, Otos_i); + __ if_cmp(ccNot(cc), true); +} + + + +void TemplateTable::ret() { + transition(vtos, vtos); + locals_index(G3_scratch); + __ access_local_returnAddress(G3_scratch, Otos_i); + // Otos_i contains the bci, compute the bcp from that + +#ifdef ASSERT + // jsr result was labeled as an 'itos' not an 'atos' because we cannot GC + // the result. The return address (really a BCI) was stored with an + // 'astore' because JVM specs claim it's a pointer-sized thing. Hence in + // the 64-bit build the 32-bit BCI is actually in the low bits of a 64-bit + // loaded value. + { Label zzz ; + __ set (65536, G3_scratch) ; + __ cmp (Otos_i, G3_scratch) ; + __ bp( Assembler::lessEqualUnsigned, false, Assembler::xcc, Assembler::pn, zzz); + __ delayed()->nop(); + __ stop("BCI is in the wrong register half?"); + __ bind (zzz) ; + } +#endif + + __ profile_ret(vtos, Otos_i, G4_scratch); + + __ ld_ptr(Lmethod, Method::const_offset(), G3_scratch); + __ add(G3_scratch, Otos_i, G3_scratch); + __ add(G3_scratch, in_bytes(ConstMethod::codes_offset()), Lbcp); + __ dispatch_next(vtos, 0, true); +} + + +void TemplateTable::wide_ret() { + transition(vtos, vtos); + locals_index_wide(G3_scratch); + __ access_local_returnAddress(G3_scratch, Otos_i); + // Otos_i contains the bci, compute the bcp from that + + __ profile_ret(vtos, Otos_i, G4_scratch); + + __ ld_ptr(Lmethod, Method::const_offset(), G3_scratch); + __ add(G3_scratch, Otos_i, G3_scratch); + __ add(G3_scratch, in_bytes(ConstMethod::codes_offset()), Lbcp); + __ dispatch_next(vtos, 0, true); +} + + +void TemplateTable::tableswitch() { + transition(itos, vtos); + Label default_case, continue_execution; + + // align bcp + __ add(Lbcp, BytesPerInt, O1); + __ and3(O1, -BytesPerInt, O1); + // load lo, hi + __ ld(O1, 1 * BytesPerInt, O2); // Low Byte + __ ld(O1, 2 * BytesPerInt, O3); // High Byte + // Sign extend the 32 bits + __ sra ( Otos_i, 0, Otos_i ); + + // check against lo & hi + __ cmp( Otos_i, O2); + __ br( Assembler::less, false, Assembler::pn, default_case); + __ delayed()->cmp( Otos_i, O3 ); + __ br( Assembler::greater, false, Assembler::pn, default_case); + // lookup dispatch offset + __ delayed()->sub(Otos_i, O2, O2); + __ profile_switch_case(O2, O3, G3_scratch, G4_scratch); + __ sll(O2, LogBytesPerInt, O2); + __ add(O2, 3 * BytesPerInt, O2); + __ ba(continue_execution); + __ delayed()->ld(O1, O2, O2); + // handle default + __ bind(default_case); + __ profile_switch_default(O3); + __ ld(O1, 0, O2); // get default offset + // continue execution + __ bind(continue_execution); + __ add(Lbcp, O2, Lbcp); + __ dispatch_next(vtos, 0, true); +} + + +void TemplateTable::lookupswitch() { + transition(itos, itos); + __ stop("lookupswitch bytecode should have been rewritten"); +} + +void TemplateTable::fast_linearswitch() { + transition(itos, vtos); + Label loop_entry, loop, found, continue_execution; + // align bcp + __ add(Lbcp, BytesPerInt, O1); + __ and3(O1, -BytesPerInt, O1); + // set counter + __ ld(O1, BytesPerInt, O2); + __ sll(O2, LogBytesPerInt + 1, O2); // in word-pairs + __ add(O1, 2 * BytesPerInt, O3); // set first pair addr + __ ba(loop_entry); + __ delayed()->add(O3, O2, O2); // counter now points past last pair + + // table search + __ bind(loop); + __ cmp(O4, Otos_i); + __ br(Assembler::equal, true, Assembler::pn, found); + __ delayed()->ld(O3, BytesPerInt, O4); // offset -> O4 + __ inc(O3, 2 * BytesPerInt); + + __ bind(loop_entry); + __ cmp(O2, O3); + __ brx(Assembler::greaterUnsigned, true, Assembler::pt, loop); + __ delayed()->ld(O3, 0, O4); + + // default case + __ ld(O1, 0, O4); // get default offset + if (ProfileInterpreter) { + __ profile_switch_default(O3); + __ ba_short(continue_execution); + } + + // entry found -> get offset + __ bind(found); + if (ProfileInterpreter) { + __ sub(O3, O1, O3); + __ sub(O3, 2*BytesPerInt, O3); + __ srl(O3, LogBytesPerInt + 1, O3); // in word-pairs + __ profile_switch_case(O3, O1, O2, G3_scratch); + + __ bind(continue_execution); + } + __ add(Lbcp, O4, Lbcp); + __ dispatch_next(vtos, 0, true); +} + + +void TemplateTable::fast_binaryswitch() { + transition(itos, vtos); + // Implementation using the following core algorithm: (copied from Intel) + // + // int binary_search(int key, LookupswitchPair* array, int n) { + // // Binary search according to "Methodik des Programmierens" by + // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985. + // int i = 0; + // int j = n; + // while (i+1 < j) { + // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q) + // // with Q: for all i: 0 <= i < n: key < a[i] + // // where a stands for the array and assuming that the (inexisting) + // // element a[n] is infinitely big. + // int h = (i + j) >> 1; + // // i < h < j + // if (key < array[h].fast_match()) { + // j = h; + // } else { + // i = h; + // } + // } + // // R: a[i] <= key < a[i+1] or Q + // // (i.e., if key is within array, i is the correct index) + // return i; + // } + + // register allocation + assert(Otos_i == O0, "alias checking"); + const Register Rkey = Otos_i; // already set (tosca) + const Register Rarray = O1; + const Register Ri = O2; + const Register Rj = O3; + const Register Rh = O4; + const Register Rscratch = O5; + + const int log_entry_size = 3; + const int entry_size = 1 << log_entry_size; + + Label found; + // Find Array start + __ add(Lbcp, 3 * BytesPerInt, Rarray); + __ and3(Rarray, -BytesPerInt, Rarray); + // initialize i & j (in delay slot) + __ clr( Ri ); + + // and start + Label entry; + __ ba(entry); + __ delayed()->ld( Rarray, -BytesPerInt, Rj); + // (Rj is already in the native byte-ordering.) + + // binary search loop + { Label loop; + __ bind( loop ); + // int h = (i + j) >> 1; + __ sra( Rh, 1, Rh ); + // if (key < array[h].fast_match()) { + // j = h; + // } else { + // i = h; + // } + __ sll( Rh, log_entry_size, Rscratch ); + __ ld( Rarray, Rscratch, Rscratch ); + // (Rscratch is already in the native byte-ordering.) + __ cmp( Rkey, Rscratch ); + __ movcc( Assembler::less, false, Assembler::icc, Rh, Rj ); // j = h if (key < array[h].fast_match()) + __ movcc( Assembler::greaterEqual, false, Assembler::icc, Rh, Ri ); // i = h if (key >= array[h].fast_match()) + + // while (i+1 < j) + __ bind( entry ); + __ add( Ri, 1, Rscratch ); + __ cmp(Rscratch, Rj); + __ br( Assembler::less, true, Assembler::pt, loop ); + __ delayed()->add( Ri, Rj, Rh ); // start h = i + j >> 1; + } + + // end of binary search, result index is i (must check again!) + Label default_case; + Label continue_execution; + if (ProfileInterpreter) { + __ mov( Ri, Rh ); // Save index in i for profiling + } + __ sll( Ri, log_entry_size, Ri ); + __ ld( Rarray, Ri, Rscratch ); + // (Rscratch is already in the native byte-ordering.) + __ cmp( Rkey, Rscratch ); + __ br( Assembler::notEqual, true, Assembler::pn, default_case ); + __ delayed()->ld( Rarray, -2 * BytesPerInt, Rj ); // load default offset -> j + + // entry found -> j = offset + __ inc( Ri, BytesPerInt ); + __ profile_switch_case(Rh, Rj, Rscratch, Rkey); + __ ld( Rarray, Ri, Rj ); + // (Rj is already in the native byte-ordering.) + + if (ProfileInterpreter) { + __ ba_short(continue_execution); + } + + __ bind(default_case); // fall through (if not profiling) + __ profile_switch_default(Ri); + + __ bind(continue_execution); + __ add( Lbcp, Rj, Lbcp ); + __ dispatch_next(vtos, 0, true); +} + + +void TemplateTable::_return(TosState state) { + transition(state, state); + assert(_desc->calls_vm(), "inconsistent calls_vm information"); + + if (_desc->bytecode() == Bytecodes::_return_register_finalizer) { + assert(state == vtos, "only valid state"); + __ mov(G0, G3_scratch); + __ access_local_ptr(G3_scratch, Otos_i); + __ load_klass(Otos_i, O2); + __ set(JVM_ACC_HAS_FINALIZER, G3); + __ ld(O2, in_bytes(Klass::access_flags_offset()), O2); + __ andcc(G3, O2, G0); + Label skip_register_finalizer; + __ br(Assembler::zero, false, Assembler::pn, skip_register_finalizer); + __ delayed()->nop(); + + // Call out to do finalizer registration + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), Otos_i); + + __ bind(skip_register_finalizer); + } + + if (_desc->bytecode() != Bytecodes::_return_register_finalizer) { + Label no_safepoint; + __ ldx(Address(G2_thread, JavaThread::polling_page_offset()), G3_scratch, 0); + __ btst(SafepointMechanism::poll_bit(), G3_scratch); + __ br(Assembler::zero, false, Assembler::pt, no_safepoint); + __ delayed()->nop(); + __ push(state); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)); + __ pop(state); + __ bind(no_safepoint); + } + + // Narrow result if state is itos but result type is smaller. + // Need to narrow in the return bytecode rather than in generate_return_entry + // since compiled code callers expect the result to already be narrowed. + if (state == itos) { + __ narrow(Otos_i); + } + __ remove_activation(state, /* throw_monitor_exception */ true); + + // The caller's SP was adjusted upon method entry to accommodate + // the callee's non-argument locals. Undo that adjustment. + __ ret(); // return to caller + __ delayed()->restore(I5_savedSP, G0, SP); +} + + +// ---------------------------------------------------------------------------- +// Volatile variables demand their effects be made known to all CPU's in +// order. Store buffers on most chips allow reads & writes to reorder; the +// JMM's ReadAfterWrite.java test fails in -Xint mode without some kind of +// memory barrier (i.e., it's not sufficient that the interpreter does not +// reorder volatile references, the hardware also must not reorder them). +// +// According to the new Java Memory Model (JMM): +// (1) All volatiles are serialized wrt to each other. +// ALSO reads & writes act as acquire & release, so: +// (2) A read cannot let unrelated NON-volatile memory refs that happen after +// the read float up to before the read. It's OK for non-volatile memory refs +// that happen before the volatile read to float down below it. +// (3) Similar a volatile write cannot let unrelated NON-volatile memory refs +// that happen BEFORE the write float down to after the write. It's OK for +// non-volatile memory refs that happen after the volatile write to float up +// before it. +// +// We only put in barriers around volatile refs (they are expensive), not +// _between_ memory refs (that would require us to track the flavor of the +// previous memory refs). Requirements (2) and (3) require some barriers +// before volatile stores and after volatile loads. These nearly cover +// requirement (1) but miss the volatile-store-volatile-load case. This final +// case is placed after volatile-stores although it could just as well go +// before volatile-loads. +void TemplateTable::volatile_barrier(Assembler::Membar_mask_bits order_constraint) { + // Helper function to insert a is-volatile test and memory barrier + // All current sparc implementations run in TSO, needing only StoreLoad + if ((order_constraint & Assembler::StoreLoad) == 0) return; + __ membar( order_constraint ); +} + +// ---------------------------------------------------------------------------- +void TemplateTable::resolve_cache_and_index(int byte_no, + Register Rcache, + Register index, + size_t index_size) { + // Depends on cpCacheOop layout! + + Label resolved; + Bytecodes::Code code = bytecode(); + switch (code) { + case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break; + case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break; + } + + assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); + __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, Lbyte_code, byte_no, 1, index_size); + __ cmp(Lbyte_code, code); // have we resolved this bytecode? + __ br(Assembler::equal, false, Assembler::pt, resolved); + __ delayed()->set(code, O1); + + address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); + // first time invocation - must resolve first + __ call_VM(noreg, entry, O1); + // Update registers with resolved info + __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size); + __ bind(resolved); +} + +void TemplateTable::load_invoke_cp_cache_entry(int byte_no, + Register method, + Register itable_index, + Register flags, + bool is_invokevirtual, + bool is_invokevfinal, + bool is_invokedynamic) { + // Uses both G3_scratch and G4_scratch + Register cache = G3_scratch; + Register index = G4_scratch; + assert_different_registers(cache, method, itable_index); + + // determine constant pool cache field offsets + assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant"); + const int method_offset = in_bytes( + ConstantPoolCache::base_offset() + + ((byte_no == f2_byte) + ? ConstantPoolCacheEntry::f2_offset() + : ConstantPoolCacheEntry::f1_offset() + ) + ); + const int flags_offset = in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::flags_offset()); + // access constant pool cache fields + const int index_offset = in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::f2_offset()); + + if (is_invokevfinal) { + __ get_cache_and_index_at_bcp(cache, index, 1); + __ ld_ptr(Address(cache, method_offset), method); + } else { + size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2)); + resolve_cache_and_index(byte_no, cache, index, index_size); + __ ld_ptr(Address(cache, method_offset), method); + } + + if (itable_index != noreg) { + // pick up itable or appendix index from f2 also: + __ ld_ptr(Address(cache, index_offset), itable_index); + } + __ ld_ptr(Address(cache, flags_offset), flags); +} + +// The Rcache register must be set before call +void TemplateTable::load_field_cp_cache_entry(Register Robj, + Register Rcache, + Register index, + Register Roffset, + Register Rflags, + bool is_static) { + assert_different_registers(Rcache, Rflags, Roffset, Lscratch); + + ByteSize cp_base_offset = ConstantPoolCache::base_offset(); + + __ ld_ptr(Rcache, cp_base_offset + ConstantPoolCacheEntry::flags_offset(), Rflags); + __ ld_ptr(Rcache, cp_base_offset + ConstantPoolCacheEntry::f2_offset(), Roffset); + if (is_static) { + __ ld_ptr(Rcache, cp_base_offset + ConstantPoolCacheEntry::f1_offset(), Robj); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + __ ld_ptr( Robj, mirror_offset, Robj); + __ resolve_oop_handle(Robj, Lscratch); + } +} + +// The registers Rcache and index expected to be set before call. +// Correct values of the Rcache and index registers are preserved. +void TemplateTable::jvmti_post_field_access(Register Rcache, + Register index, + bool is_static, + bool has_tos) { + ByteSize cp_base_offset = ConstantPoolCache::base_offset(); + + if (JvmtiExport::can_post_field_access()) { + // Check to see if a field access watch has been set before we take + // the time to call into the VM. + Label Label1; + assert_different_registers(Rcache, index, G1_scratch); + AddressLiteral get_field_access_count_addr(JvmtiExport::get_field_access_count_addr()); + __ load_contents(get_field_access_count_addr, G1_scratch); + __ cmp_and_br_short(G1_scratch, 0, Assembler::equal, Assembler::pt, Label1); + + __ add(Rcache, in_bytes(cp_base_offset), Rcache); + + if (is_static) { + __ clr(Otos_i); + } else { + if (has_tos) { + // save object pointer before call_VM() clobbers it + __ push_ptr(Otos_i); // put object on tos where GC wants it. + } else { + // Load top of stack (do not pop the value off the stack); + __ ld_ptr(Lesp, Interpreter::expr_offset_in_bytes(0), Otos_i); + } + __ verify_oop(Otos_i); + } + // Otos_i: object pointer or NULL if static + // Rcache: cache entry pointer + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), + Otos_i, Rcache); + if (!is_static && has_tos) { + __ pop_ptr(Otos_i); // restore object pointer + __ verify_oop(Otos_i); + } + __ get_cache_and_index_at_bcp(Rcache, index, 1); + __ bind(Label1); + } +} + +void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) { + transition(vtos, vtos); + + Register Rcache = G3_scratch; + Register index = G4_scratch; + Register Rclass = Rcache; + Register Roffset= G4_scratch; + Register Rflags = G1_scratch; + ByteSize cp_base_offset = ConstantPoolCache::base_offset(); + + resolve_cache_and_index(byte_no, Rcache, index, sizeof(u2)); + jvmti_post_field_access(Rcache, index, is_static, false); + load_field_cp_cache_entry(Rclass, Rcache, index, Roffset, Rflags, is_static); + + if (!is_static) { + pop_and_check_object(Rclass); + } else { + __ verify_oop(Rclass); + } + + Label exit; + + Assembler::Membar_mask_bits membar_bits = + Assembler::Membar_mask_bits(Assembler::LoadLoad | Assembler::LoadStore); + + if (__ membar_has_effect(membar_bits)) { + // Get volatile flag + __ set((1 << ConstantPoolCacheEntry::is_volatile_shift), Lscratch); + __ and3(Rflags, Lscratch, Lscratch); + } + + Label checkVolatile; + + // compute field type + Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj; + __ srl(Rflags, ConstantPoolCacheEntry::tos_state_shift, Rflags); + // Make sure we don't need to mask Rflags after the above shift + ConstantPoolCacheEntry::verify_tos_state_shift(); + + // Check atos before itos for getstatic, more likely (in Queens at least) + __ cmp(Rflags, atos); + __ br(Assembler::notEqual, false, Assembler::pt, notObj); + __ delayed() ->cmp(Rflags, itos); + + // atos + do_oop_load(_masm, Rclass, Roffset, 0, Otos_i, noreg); + __ verify_oop(Otos_i); + __ push(atos); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_agetfield, G3_scratch, G4_scratch); + } + __ ba(checkVolatile); + __ delayed()->tst(Lscratch); + + __ bind(notObj); + + // cmp(Rflags, itos); + __ br(Assembler::notEqual, false, Assembler::pt, notInt); + __ delayed() ->cmp(Rflags, ltos); + + // itos + __ ld(Rclass, Roffset, Otos_i); + __ push(itos); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_igetfield, G3_scratch, G4_scratch); + } + __ ba(checkVolatile); + __ delayed()->tst(Lscratch); + + __ bind(notInt); + + // cmp(Rflags, ltos); + __ br(Assembler::notEqual, false, Assembler::pt, notLong); + __ delayed() ->cmp(Rflags, btos); + + // ltos + // load must be atomic + __ ld_long(Rclass, Roffset, Otos_l); + __ push(ltos); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_lgetfield, G3_scratch, G4_scratch); + } + __ ba(checkVolatile); + __ delayed()->tst(Lscratch); + + __ bind(notLong); + + // cmp(Rflags, btos); + __ br(Assembler::notEqual, false, Assembler::pt, notByte); + __ delayed() ->cmp(Rflags, ztos); + + // btos + __ ldsb(Rclass, Roffset, Otos_i); + __ push(itos); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_bgetfield, G3_scratch, G4_scratch); + } + __ ba(checkVolatile); + __ delayed()->tst(Lscratch); + + __ bind(notByte); + + // cmp(Rflags, ztos); + __ br(Assembler::notEqual, false, Assembler::pt, notBool); + __ delayed() ->cmp(Rflags, ctos); + + // ztos + __ ldsb(Rclass, Roffset, Otos_i); + __ push(itos); + if (!is_static && rc == may_rewrite) { + // use btos rewriting, no truncating to t/f bit is needed for getfield. + patch_bytecode(Bytecodes::_fast_bgetfield, G3_scratch, G4_scratch); + } + __ ba(checkVolatile); + __ delayed()->tst(Lscratch); + + __ bind(notBool); + + // cmp(Rflags, ctos); + __ br(Assembler::notEqual, false, Assembler::pt, notChar); + __ delayed() ->cmp(Rflags, stos); + + // ctos + __ lduh(Rclass, Roffset, Otos_i); + __ push(itos); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_cgetfield, G3_scratch, G4_scratch); + } + __ ba(checkVolatile); + __ delayed()->tst(Lscratch); + + __ bind(notChar); + + // cmp(Rflags, stos); + __ br(Assembler::notEqual, false, Assembler::pt, notShort); + __ delayed() ->cmp(Rflags, ftos); + + // stos + __ ldsh(Rclass, Roffset, Otos_i); + __ push(itos); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_sgetfield, G3_scratch, G4_scratch); + } + __ ba(checkVolatile); + __ delayed()->tst(Lscratch); + + __ bind(notShort); + + + // cmp(Rflags, ftos); + __ br(Assembler::notEqual, false, Assembler::pt, notFloat); + __ delayed() ->tst(Lscratch); + + // ftos + __ ldf(FloatRegisterImpl::S, Rclass, Roffset, Ftos_f); + __ push(ftos); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_fgetfield, G3_scratch, G4_scratch); + } + __ ba(checkVolatile); + __ delayed()->tst(Lscratch); + + __ bind(notFloat); + + + // dtos + __ ldf(FloatRegisterImpl::D, Rclass, Roffset, Ftos_d); + __ push(dtos); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_dgetfield, G3_scratch, G4_scratch); + } + + __ bind(checkVolatile); + if (__ membar_has_effect(membar_bits)) { + // __ tst(Lscratch); executed in delay slot + __ br(Assembler::zero, false, Assembler::pt, exit); + __ delayed()->nop(); + volatile_barrier(membar_bits); + } + + __ bind(exit); +} + +void TemplateTable::getfield(int byte_no) { + getfield_or_static(byte_no, false); +} + +void TemplateTable::nofast_getfield(int byte_no) { + getfield_or_static(byte_no, false, may_not_rewrite); +} + +void TemplateTable::getstatic(int byte_no) { + getfield_or_static(byte_no, true); +} + +void TemplateTable::fast_accessfield(TosState state) { + transition(atos, state); + Register Rcache = G3_scratch; + Register index = G4_scratch; + Register Roffset = G4_scratch; + Register Rflags = Rcache; + ByteSize cp_base_offset = ConstantPoolCache::base_offset(); + + __ get_cache_and_index_at_bcp(Rcache, index, 1); + jvmti_post_field_access(Rcache, index, /*is_static*/false, /*has_tos*/true); + + __ ld_ptr(Rcache, cp_base_offset + ConstantPoolCacheEntry::f2_offset(), Roffset); + + __ null_check(Otos_i); + __ verify_oop(Otos_i); + + Label exit; + + Assembler::Membar_mask_bits membar_bits = + Assembler::Membar_mask_bits(Assembler::LoadLoad | Assembler::LoadStore); + if (__ membar_has_effect(membar_bits)) { + // Get volatile flag + __ ld_ptr(Rcache, cp_base_offset + ConstantPoolCacheEntry::f2_offset(), Rflags); + __ set((1 << ConstantPoolCacheEntry::is_volatile_shift), Lscratch); + } + + switch (bytecode()) { + case Bytecodes::_fast_bgetfield: + __ ldsb(Otos_i, Roffset, Otos_i); + break; + case Bytecodes::_fast_cgetfield: + __ lduh(Otos_i, Roffset, Otos_i); + break; + case Bytecodes::_fast_sgetfield: + __ ldsh(Otos_i, Roffset, Otos_i); + break; + case Bytecodes::_fast_igetfield: + __ ld(Otos_i, Roffset, Otos_i); + break; + case Bytecodes::_fast_lgetfield: + __ ld_long(Otos_i, Roffset, Otos_l); + break; + case Bytecodes::_fast_fgetfield: + __ ldf(FloatRegisterImpl::S, Otos_i, Roffset, Ftos_f); + break; + case Bytecodes::_fast_dgetfield: + __ ldf(FloatRegisterImpl::D, Otos_i, Roffset, Ftos_d); + break; + case Bytecodes::_fast_agetfield: + do_oop_load(_masm, Otos_i, Roffset, 0, Otos_i, noreg); + break; + default: + ShouldNotReachHere(); + } + + if (__ membar_has_effect(membar_bits)) { + __ btst(Lscratch, Rflags); + __ br(Assembler::zero, false, Assembler::pt, exit); + __ delayed()->nop(); + volatile_barrier(membar_bits); + __ bind(exit); + } + + if (state == atos) { + __ verify_oop(Otos_i); // does not blow flags! + } +} + +void TemplateTable::jvmti_post_fast_field_mod() { + if (JvmtiExport::can_post_field_modification()) { + // Check to see if a field modification watch has been set before we take + // the time to call into the VM. + Label done; + AddressLiteral get_field_modification_count_addr(JvmtiExport::get_field_modification_count_addr()); + __ load_contents(get_field_modification_count_addr, G4_scratch); + __ cmp_and_br_short(G4_scratch, 0, Assembler::equal, Assembler::pt, done); + __ pop_ptr(G4_scratch); // copy the object pointer from tos + __ verify_oop(G4_scratch); + __ push_ptr(G4_scratch); // put the object pointer back on tos + __ get_cache_entry_pointer_at_bcp(G1_scratch, G3_scratch, 1); + // Save tos values before call_VM() clobbers them. Since we have + // to do it for every data type, we use the saved values as the + // jvalue object. + switch (bytecode()) { // save tos values before call_VM() clobbers them + case Bytecodes::_fast_aputfield: __ push_ptr(Otos_i); break; + case Bytecodes::_fast_bputfield: // fall through + case Bytecodes::_fast_zputfield: // fall through + case Bytecodes::_fast_sputfield: // fall through + case Bytecodes::_fast_cputfield: // fall through + case Bytecodes::_fast_iputfield: __ push_i(Otos_i); break; + case Bytecodes::_fast_dputfield: __ push_d(Ftos_d); break; + case Bytecodes::_fast_fputfield: __ push_f(Ftos_f); break; + // get words in right order for use as jvalue object + case Bytecodes::_fast_lputfield: __ push_l(Otos_l); break; + } + // setup pointer to jvalue object + __ mov(Lesp, G3_scratch); __ inc(G3_scratch, wordSize); + // G4_scratch: object pointer + // G1_scratch: cache entry pointer + // G3_scratch: jvalue object on the stack + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), G4_scratch, G1_scratch, G3_scratch); + switch (bytecode()) { // restore tos values + case Bytecodes::_fast_aputfield: __ pop_ptr(Otos_i); break; + case Bytecodes::_fast_bputfield: // fall through + case Bytecodes::_fast_zputfield: // fall through + case Bytecodes::_fast_sputfield: // fall through + case Bytecodes::_fast_cputfield: // fall through + case Bytecodes::_fast_iputfield: __ pop_i(Otos_i); break; + case Bytecodes::_fast_dputfield: __ pop_d(Ftos_d); break; + case Bytecodes::_fast_fputfield: __ pop_f(Ftos_f); break; + case Bytecodes::_fast_lputfield: __ pop_l(Otos_l); break; + } + __ bind(done); + } +} + +// The registers Rcache and index expected to be set before call. +// The function may destroy various registers, just not the Rcache and index registers. +void TemplateTable::jvmti_post_field_mod(Register Rcache, Register index, bool is_static) { + ByteSize cp_base_offset = ConstantPoolCache::base_offset(); + + if (JvmtiExport::can_post_field_modification()) { + // Check to see if a field modification watch has been set before we take + // the time to call into the VM. + Label Label1; + assert_different_registers(Rcache, index, G1_scratch); + AddressLiteral get_field_modification_count_addr(JvmtiExport::get_field_modification_count_addr()); + __ load_contents(get_field_modification_count_addr, G1_scratch); + __ cmp_and_br_short(G1_scratch, 0, Assembler::zero, Assembler::pt, Label1); + + // The Rcache and index registers have been already set. + // This allows to eliminate this call but the Rcache and index + // registers must be correspondingly used after this line. + __ get_cache_and_index_at_bcp(G1_scratch, G4_scratch, 1); + + __ add(G1_scratch, in_bytes(cp_base_offset), G3_scratch); + if (is_static) { + // Life is simple. Null out the object pointer. + __ clr(G4_scratch); + } else { + Register Rflags = G1_scratch; + // Life is harder. The stack holds the value on top, followed by the + // object. We don't know the size of the value, though; it could be + // one or two words depending on its type. As a result, we must find + // the type to determine where the object is. + + Label two_word, valsizeknown; + __ ld_ptr(G1_scratch, cp_base_offset + ConstantPoolCacheEntry::flags_offset(), Rflags); + __ mov(Lesp, G4_scratch); + __ srl(Rflags, ConstantPoolCacheEntry::tos_state_shift, Rflags); + // Make sure we don't need to mask Rflags after the above shift + ConstantPoolCacheEntry::verify_tos_state_shift(); + __ cmp(Rflags, ltos); + __ br(Assembler::equal, false, Assembler::pt, two_word); + __ delayed()->cmp(Rflags, dtos); + __ br(Assembler::equal, false, Assembler::pt, two_word); + __ delayed()->nop(); + __ inc(G4_scratch, Interpreter::expr_offset_in_bytes(1)); + __ ba_short(valsizeknown); + __ bind(two_word); + + __ inc(G4_scratch, Interpreter::expr_offset_in_bytes(2)); + + __ bind(valsizeknown); + // setup object pointer + __ ld_ptr(G4_scratch, 0, G4_scratch); + __ verify_oop(G4_scratch); + } + // setup pointer to jvalue object + __ mov(Lesp, G1_scratch); __ inc(G1_scratch, wordSize); + // G4_scratch: object pointer or NULL if static + // G3_scratch: cache entry pointer + // G1_scratch: jvalue object on the stack + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), + G4_scratch, G3_scratch, G1_scratch); + __ get_cache_and_index_at_bcp(Rcache, index, 1); + __ bind(Label1); + } +} + +void TemplateTable::pop_and_check_object(Register r) { + __ pop_ptr(r); + __ null_check(r); // for field access must check obj. + __ verify_oop(r); +} + +void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) { + transition(vtos, vtos); + Register Rcache = G3_scratch; + Register index = G4_scratch; + Register Rclass = Rcache; + Register Roffset= G4_scratch; + Register Rflags = G1_scratch; + ByteSize cp_base_offset = ConstantPoolCache::base_offset(); + + resolve_cache_and_index(byte_no, Rcache, index, sizeof(u2)); + jvmti_post_field_mod(Rcache, index, is_static); + load_field_cp_cache_entry(Rclass, Rcache, index, Roffset, Rflags, is_static); + + Assembler::Membar_mask_bits read_bits = + Assembler::Membar_mask_bits(Assembler::LoadStore | Assembler::StoreStore); + Assembler::Membar_mask_bits write_bits = Assembler::StoreLoad; + + Label notVolatile, checkVolatile, exit; + if (__ membar_has_effect(read_bits) || __ membar_has_effect(write_bits)) { + __ set((1 << ConstantPoolCacheEntry::is_volatile_shift), Lscratch); + __ and3(Rflags, Lscratch, Lscratch); + + if (__ membar_has_effect(read_bits)) { + __ cmp_and_br_short(Lscratch, 0, Assembler::equal, Assembler::pt, notVolatile); + volatile_barrier(read_bits); + __ bind(notVolatile); + } + } + + __ srl(Rflags, ConstantPoolCacheEntry::tos_state_shift, Rflags); + // Make sure we don't need to mask Rflags after the above shift + ConstantPoolCacheEntry::verify_tos_state_shift(); + + // compute field type + Label notInt, notShort, notChar, notObj, notByte, notBool, notLong, notFloat; + + if (is_static) { + // putstatic with object type most likely, check that first + __ cmp(Rflags, atos); + __ br(Assembler::notEqual, false, Assembler::pt, notObj); + __ delayed()->cmp(Rflags, itos); + + // atos + { + __ pop_ptr(); + __ verify_oop(Otos_i); + do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch); + __ ba(checkVolatile); + __ delayed()->tst(Lscratch); + } + + __ bind(notObj); + // cmp(Rflags, itos); + __ br(Assembler::notEqual, false, Assembler::pt, notInt); + __ delayed()->cmp(Rflags, btos); + + // itos + { + __ pop_i(); + __ st(Otos_i, Rclass, Roffset); + __ ba(checkVolatile); + __ delayed()->tst(Lscratch); + } + + __ bind(notInt); + } else { + // putfield with int type most likely, check that first + __ cmp(Rflags, itos); + __ br(Assembler::notEqual, false, Assembler::pt, notInt); + __ delayed()->cmp(Rflags, atos); + + // itos + { + __ pop_i(); + pop_and_check_object(Rclass); + __ st(Otos_i, Rclass, Roffset); + if (rc == may_rewrite) patch_bytecode(Bytecodes::_fast_iputfield, G3_scratch, G4_scratch, true, byte_no); + __ ba(checkVolatile); + __ delayed()->tst(Lscratch); + } + + __ bind(notInt); + // cmp(Rflags, atos); + __ br(Assembler::notEqual, false, Assembler::pt, notObj); + __ delayed()->cmp(Rflags, btos); + + // atos + { + __ pop_ptr(); + pop_and_check_object(Rclass); + __ verify_oop(Otos_i); + do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch); + if (rc == may_rewrite) patch_bytecode(Bytecodes::_fast_aputfield, G3_scratch, G4_scratch, true, byte_no); + __ ba(checkVolatile); + __ delayed()->tst(Lscratch); + } + + __ bind(notObj); + } + + // cmp(Rflags, btos); + __ br(Assembler::notEqual, false, Assembler::pt, notByte); + __ delayed()->cmp(Rflags, ztos); + + // btos + { + __ pop_i(); + if (!is_static) pop_and_check_object(Rclass); + __ stb(Otos_i, Rclass, Roffset); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_bputfield, G3_scratch, G4_scratch, true, byte_no); + } + __ ba(checkVolatile); + __ delayed()->tst(Lscratch); + } + + __ bind(notByte); + + // cmp(Rflags, btos); + __ br(Assembler::notEqual, false, Assembler::pt, notBool); + __ delayed()->cmp(Rflags, ltos); + + // ztos + { + __ pop_i(); + if (!is_static) pop_and_check_object(Rclass); + __ and3(Otos_i, 1, Otos_i); + __ stb(Otos_i, Rclass, Roffset); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_zputfield, G3_scratch, G4_scratch, true, byte_no); + } + __ ba(checkVolatile); + __ delayed()->tst(Lscratch); + } + + __ bind(notBool); + // cmp(Rflags, ltos); + __ br(Assembler::notEqual, false, Assembler::pt, notLong); + __ delayed()->cmp(Rflags, ctos); + + // ltos + { + __ pop_l(); + if (!is_static) pop_and_check_object(Rclass); + __ st_long(Otos_l, Rclass, Roffset); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_lputfield, G3_scratch, G4_scratch, true, byte_no); + } + __ ba(checkVolatile); + __ delayed()->tst(Lscratch); + } + + __ bind(notLong); + // cmp(Rflags, ctos); + __ br(Assembler::notEqual, false, Assembler::pt, notChar); + __ delayed()->cmp(Rflags, stos); + + // ctos (char) + { + __ pop_i(); + if (!is_static) pop_and_check_object(Rclass); + __ sth(Otos_i, Rclass, Roffset); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_cputfield, G3_scratch, G4_scratch, true, byte_no); + } + __ ba(checkVolatile); + __ delayed()->tst(Lscratch); + } + + __ bind(notChar); + // cmp(Rflags, stos); + __ br(Assembler::notEqual, false, Assembler::pt, notShort); + __ delayed()->cmp(Rflags, ftos); + + // stos (short) + { + __ pop_i(); + if (!is_static) pop_and_check_object(Rclass); + __ sth(Otos_i, Rclass, Roffset); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_sputfield, G3_scratch, G4_scratch, true, byte_no); + } + __ ba(checkVolatile); + __ delayed()->tst(Lscratch); + } + + __ bind(notShort); + // cmp(Rflags, ftos); + __ br(Assembler::notZero, false, Assembler::pt, notFloat); + __ delayed()->nop(); + + // ftos + { + __ pop_f(); + if (!is_static) pop_and_check_object(Rclass); + __ stf(FloatRegisterImpl::S, Ftos_f, Rclass, Roffset); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_fputfield, G3_scratch, G4_scratch, true, byte_no); + } + __ ba(checkVolatile); + __ delayed()->tst(Lscratch); + } + + __ bind(notFloat); + + // dtos + { + __ pop_d(); + if (!is_static) pop_and_check_object(Rclass); + __ stf(FloatRegisterImpl::D, Ftos_d, Rclass, Roffset); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_dputfield, G3_scratch, G4_scratch, true, byte_no); + } + } + + __ bind(checkVolatile); + __ tst(Lscratch); + + if (__ membar_has_effect(write_bits)) { + // __ tst(Lscratch); in delay slot + __ br(Assembler::zero, false, Assembler::pt, exit); + __ delayed()->nop(); + volatile_barrier(Assembler::StoreLoad); + __ bind(exit); + } +} + +void TemplateTable::fast_storefield(TosState state) { + transition(state, vtos); + Register Rcache = G3_scratch; + Register Rclass = Rcache; + Register Roffset= G4_scratch; + Register Rflags = G1_scratch; + ByteSize cp_base_offset = ConstantPoolCache::base_offset(); + + jvmti_post_fast_field_mod(); + + __ get_cache_and_index_at_bcp(Rcache, G4_scratch, 1); + + Assembler::Membar_mask_bits read_bits = + Assembler::Membar_mask_bits(Assembler::LoadStore | Assembler::StoreStore); + Assembler::Membar_mask_bits write_bits = Assembler::StoreLoad; + + Label notVolatile, checkVolatile, exit; + if (__ membar_has_effect(read_bits) || __ membar_has_effect(write_bits)) { + __ ld_ptr(Rcache, cp_base_offset + ConstantPoolCacheEntry::flags_offset(), Rflags); + __ set((1 << ConstantPoolCacheEntry::is_volatile_shift), Lscratch); + __ and3(Rflags, Lscratch, Lscratch); + if (__ membar_has_effect(read_bits)) { + __ cmp_and_br_short(Lscratch, 0, Assembler::equal, Assembler::pt, notVolatile); + volatile_barrier(read_bits); + __ bind(notVolatile); + } + } + + __ ld_ptr(Rcache, cp_base_offset + ConstantPoolCacheEntry::f2_offset(), Roffset); + pop_and_check_object(Rclass); + + switch (bytecode()) { + case Bytecodes::_fast_zputfield: __ and3(Otos_i, 1, Otos_i); // fall through to bputfield + case Bytecodes::_fast_bputfield: __ stb(Otos_i, Rclass, Roffset); break; + case Bytecodes::_fast_cputfield: /* fall through */ + case Bytecodes::_fast_sputfield: __ sth(Otos_i, Rclass, Roffset); break; + case Bytecodes::_fast_iputfield: __ st(Otos_i, Rclass, Roffset); break; + case Bytecodes::_fast_lputfield: __ st_long(Otos_l, Rclass, Roffset); break; + case Bytecodes::_fast_fputfield: + __ stf(FloatRegisterImpl::S, Ftos_f, Rclass, Roffset); + break; + case Bytecodes::_fast_dputfield: + __ stf(FloatRegisterImpl::D, Ftos_d, Rclass, Roffset); + break; + case Bytecodes::_fast_aputfield: + do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch); + break; + default: + ShouldNotReachHere(); + } + + if (__ membar_has_effect(write_bits)) { + __ cmp_and_br_short(Lscratch, 0, Assembler::equal, Assembler::pt, exit); + volatile_barrier(Assembler::StoreLoad); + __ bind(exit); + } +} + +void TemplateTable::putfield(int byte_no) { + putfield_or_static(byte_no, false); +} + +void TemplateTable::nofast_putfield(int byte_no) { + putfield_or_static(byte_no, false, may_not_rewrite); +} + +void TemplateTable::putstatic(int byte_no) { + putfield_or_static(byte_no, true); +} + +void TemplateTable::fast_xaccess(TosState state) { + transition(vtos, state); + Register Rcache = G3_scratch; + Register Roffset = G4_scratch; + Register Rflags = G4_scratch; + Register Rreceiver = Lscratch; + + __ ld_ptr(Llocals, 0, Rreceiver); + + // access constant pool cache (is resolved) + __ get_cache_and_index_at_bcp(Rcache, G4_scratch, 2); + __ ld_ptr(Rcache, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset(), Roffset); + __ add(Lbcp, 1, Lbcp); // needed to report exception at the correct bcp + + __ verify_oop(Rreceiver); + __ null_check(Rreceiver); + if (state == atos) { + do_oop_load(_masm, Rreceiver, Roffset, 0, Otos_i, noreg); + } else if (state == itos) { + __ ld (Rreceiver, Roffset, Otos_i) ; + } else if (state == ftos) { + __ ldf(FloatRegisterImpl::S, Rreceiver, Roffset, Ftos_f); + } else { + ShouldNotReachHere(); + } + + Assembler::Membar_mask_bits membar_bits = + Assembler::Membar_mask_bits(Assembler::LoadLoad | Assembler::LoadStore); + if (__ membar_has_effect(membar_bits)) { + + // Get is_volatile value in Rflags and check if membar is needed + __ ld_ptr(Rcache, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset(), Rflags); + + // Test volatile + Label notVolatile; + __ set((1 << ConstantPoolCacheEntry::is_volatile_shift), Lscratch); + __ btst(Rflags, Lscratch); + __ br(Assembler::zero, false, Assembler::pt, notVolatile); + __ delayed()->nop(); + volatile_barrier(membar_bits); + __ bind(notVolatile); + } + + __ interp_verify_oop(Otos_i, state, __FILE__, __LINE__); + __ sub(Lbcp, 1, Lbcp); +} + +//---------------------------------------------------------------------------------------------------- +// Calls + +void TemplateTable::prepare_invoke(int byte_no, + Register method, // linked method (or i-klass) + Register ra, // return address + Register index, // itable index, MethodType, etc. + Register recv, // if caller wants to see it + Register flags // if caller wants to test it + ) { + // determine flags + const Bytecodes::Code code = bytecode(); + const bool is_invokeinterface = code == Bytecodes::_invokeinterface; + const bool is_invokedynamic = code == Bytecodes::_invokedynamic; + const bool is_invokehandle = code == Bytecodes::_invokehandle; + const bool is_invokevirtual = code == Bytecodes::_invokevirtual; + const bool is_invokespecial = code == Bytecodes::_invokespecial; + const bool load_receiver = (recv != noreg); + assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), ""); + assert(recv == noreg || recv == O0, ""); + assert(flags == noreg || flags == O1, ""); + + // setup registers & access constant pool cache + if (recv == noreg) recv = O0; + if (flags == noreg) flags = O1; + const Register temp = O2; + assert_different_registers(method, ra, index, recv, flags, temp); + + load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic); + + __ mov(SP, O5_savedSP); // record SP that we wanted the callee to restore + + // maybe push appendix to arguments + if (is_invokedynamic || is_invokehandle) { + Label L_no_push; + __ set((1 << ConstantPoolCacheEntry::has_appendix_shift), temp); + __ btst(flags, temp); + __ br(Assembler::zero, false, Assembler::pt, L_no_push); + __ delayed()->nop(); + // Push the appendix as a trailing parameter. + // This must be done before we get the receiver, + // since the parameter_size includes it. + __ load_resolved_reference_at_index(temp, index, /*tmp*/recv); + __ verify_oop(temp); + __ push_ptr(temp); // push appendix (MethodType, CallSite, etc.) + __ bind(L_no_push); + } + + // load receiver if needed (after appendix is pushed so parameter size is correct) + if (load_receiver) { + __ and3(flags, ConstantPoolCacheEntry::parameter_size_mask, temp); // get parameter size + __ load_receiver(temp, recv); // __ argument_address uses Gargs but we need Lesp + __ verify_oop(recv); + } + + // compute return type + __ srl(flags, ConstantPoolCacheEntry::tos_state_shift, ra); + // Make sure we don't need to mask flags after the above shift + ConstantPoolCacheEntry::verify_tos_state_shift(); + // load return address + { + const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code); + AddressLiteral table(table_addr); + __ set(table, temp); + __ sll(ra, LogBytesPerWord, ra); + __ ld_ptr(Address(temp, ra), ra); + } +} + + +void TemplateTable::generate_vtable_call(Register Rrecv, Register Rindex, Register Rret) { + Register Rtemp = G4_scratch; + Register Rcall = Rindex; + assert_different_registers(Rcall, G5_method, Gargs, Rret); + + // get target Method* & entry point + __ lookup_virtual_method(Rrecv, Rindex, G5_method); + __ profile_arguments_type(G5_method, Rcall, Gargs, true); + __ call_from_interpreter(Rcall, Gargs, Rret); +} + +void TemplateTable::invokevirtual(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f2_byte, "use this argument"); + + Register Rscratch = G3_scratch; + Register Rtemp = G4_scratch; + Register Rret = Lscratch; + Register O0_recv = O0; + Label notFinal; + + load_invoke_cp_cache_entry(byte_no, G5_method, noreg, Rret, true, false, false); + __ mov(SP, O5_savedSP); // record SP that we wanted the callee to restore + + // Check for vfinal + __ set((1 << ConstantPoolCacheEntry::is_vfinal_shift), G4_scratch); + __ btst(Rret, G4_scratch); + __ br(Assembler::zero, false, Assembler::pt, notFinal); + __ delayed()->and3(Rret, 0xFF, G4_scratch); // gets number of parameters + + if (RewriteBytecodes && !UseSharedSpaces && !DumpSharedSpaces) { + patch_bytecode(Bytecodes::_fast_invokevfinal, Rscratch, Rtemp); + } + + invokevfinal_helper(Rscratch, Rret); + + __ bind(notFinal); + + __ mov(G5_method, Rscratch); // better scratch register + __ load_receiver(G4_scratch, O0_recv); // gets receiverOop + // receiver is in O0_recv + __ verify_oop(O0_recv); + + // get return address + AddressLiteral table(Interpreter::invoke_return_entry_table()); + __ set(table, Rtemp); + __ srl(Rret, ConstantPoolCacheEntry::tos_state_shift, Rret); // get return type + // Make sure we don't need to mask Rret after the above shift + ConstantPoolCacheEntry::verify_tos_state_shift(); + __ sll(Rret, LogBytesPerWord, Rret); + __ ld_ptr(Rtemp, Rret, Rret); // get return address + + // get receiver klass + __ null_check(O0_recv, oopDesc::klass_offset_in_bytes()); + __ load_klass(O0_recv, O0_recv); + __ verify_klass_ptr(O0_recv); + + __ profile_virtual_call(O0_recv, O4); + + generate_vtable_call(O0_recv, Rscratch, Rret); +} + +void TemplateTable::fast_invokevfinal(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f2_byte, "use this argument"); + + load_invoke_cp_cache_entry(byte_no, G5_method, noreg, Lscratch, true, + /*is_invokevfinal*/true, false); + __ mov(SP, O5_savedSP); // record SP that we wanted the callee to restore + invokevfinal_helper(G3_scratch, Lscratch); +} + +void TemplateTable::invokevfinal_helper(Register Rscratch, Register Rret) { + Register Rtemp = G4_scratch; + + // Load receiver from stack slot + __ ld_ptr(G5_method, in_bytes(Method::const_offset()), G4_scratch); + __ lduh(G4_scratch, in_bytes(ConstMethod::size_of_parameters_offset()), G4_scratch); + __ load_receiver(G4_scratch, O0); + + // receiver NULL check + __ null_check(O0); + + __ profile_final_call(O4); + __ profile_arguments_type(G5_method, Rscratch, Gargs, true); + + // get return address + AddressLiteral table(Interpreter::invoke_return_entry_table()); + __ set(table, Rtemp); + __ srl(Rret, ConstantPoolCacheEntry::tos_state_shift, Rret); // get return type + // Make sure we don't need to mask Rret after the above shift + ConstantPoolCacheEntry::verify_tos_state_shift(); + __ sll(Rret, LogBytesPerWord, Rret); + __ ld_ptr(Rtemp, Rret, Rret); // get return address + + + // do the call + __ call_from_interpreter(Rscratch, Gargs, Rret); +} + + +void TemplateTable::invokespecial(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + + const Register Rret = Lscratch; + const Register O0_recv = O0; + const Register Rscratch = G3_scratch; + + prepare_invoke(byte_no, G5_method, Rret, noreg, O0_recv); // get receiver also for null check + __ null_check(O0_recv); + + // do the call + __ profile_call(O4); + __ profile_arguments_type(G5_method, Rscratch, Gargs, false); + __ call_from_interpreter(Rscratch, Gargs, Rret); +} + + +void TemplateTable::invokestatic(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + + const Register Rret = Lscratch; + const Register Rscratch = G3_scratch; + + prepare_invoke(byte_no, G5_method, Rret); // get f1 Method* + + // do the call + __ profile_call(O4); + __ profile_arguments_type(G5_method, Rscratch, Gargs, false); + __ call_from_interpreter(Rscratch, Gargs, Rret); +} + +void TemplateTable::invokeinterface_object_method(Register RKlass, + Register Rcall, + Register Rret, + Register Rflags) { + Register Rscratch = G4_scratch; + Register Rindex = Lscratch; + + assert_different_registers(Rscratch, Rindex, Rret); + + Label notFinal; + + // Check for vfinal + __ set((1 << ConstantPoolCacheEntry::is_vfinal_shift), Rscratch); + __ btst(Rflags, Rscratch); + __ br(Assembler::zero, false, Assembler::pt, notFinal); + __ delayed()->nop(); + + __ profile_final_call(O4); + + // do the call - the index (f2) contains the Method* + assert_different_registers(G5_method, Gargs, Rcall); + __ mov(Rindex, G5_method); + __ profile_arguments_type(G5_method, Rcall, Gargs, true); + __ call_from_interpreter(Rcall, Gargs, Rret); + __ bind(notFinal); + + __ profile_virtual_call(RKlass, O4); + generate_vtable_call(RKlass, Rindex, Rret); +} + + +void TemplateTable::invokeinterface(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + + const Register Rinterface = G1_scratch; + const Register Rmethod = Lscratch; + const Register Rret = G3_scratch; + const Register O0_recv = O0; + const Register O1_flags = O1; + const Register O2_Klass = O2; + const Register Rscratch = G4_scratch; + assert_different_registers(Rscratch, G5_method); + + prepare_invoke(byte_no, Rinterface, Rret, Rmethod, O0_recv, O1_flags); + + // First check for Object case, then private interface method, + // then regular interface method. + + // get receiver klass - this is also a null check + __ null_check(O0_recv, oopDesc::klass_offset_in_bytes()); + __ load_klass(O0_recv, O2_Klass); + + // Special case of invokeinterface called for virtual method of + // java.lang.Object. See cpCache.cpp for details. + Label notObjectMethod; + __ set((1 << ConstantPoolCacheEntry::is_forced_virtual_shift), Rscratch); + __ btst(O1_flags, Rscratch); + __ br(Assembler::zero, false, Assembler::pt, notObjectMethod); + __ delayed()->nop(); + + invokeinterface_object_method(O2_Klass, Rinterface, Rret, O1_flags); + + __ bind(notObjectMethod); + + Label L_no_such_interface; + + // Check for private method invocation - indicated by vfinal + Label notVFinal; + { + __ set((1 << ConstantPoolCacheEntry::is_vfinal_shift), Rscratch); + __ btst(O1_flags, Rscratch); + __ br(Assembler::zero, false, Assembler::pt, notVFinal); + __ delayed()->nop(); + + Label subtype; + Register Rtemp = O1_flags; + __ check_klass_subtype(O2_Klass, Rinterface, Rscratch, Rtemp, subtype); + // If we get here the typecheck failed + __ ba(L_no_such_interface); + __ delayed()->nop(); + __ bind(subtype); + + // do the call + Register Rcall = Rinterface; + __ mov(Rmethod, G5_method); + assert_different_registers(Rcall, G5_method, Gargs, Rret); + + __ profile_arguments_type(G5_method, Rcall, Gargs, true); + __ profile_final_call(Rscratch); + __ call_from_interpreter(Rcall, Gargs, Rret); + } + __ bind(notVFinal); + + Register Rtemp = O1_flags; + + // Receiver subtype check against REFC. + __ lookup_interface_method(// inputs: rec. class, interface, itable index + O2_Klass, Rinterface, noreg, + // outputs: temp reg1, temp reg2, temp reg3 + G5_method, Rscratch, Rtemp, + L_no_such_interface, + /*return_method=*/false); + + __ profile_virtual_call(O2_Klass, O4); + + // + // find entry point to call + // + + // Get declaring interface class from method + __ ld_ptr(Rmethod, Method::const_offset(), Rinterface); + __ ld_ptr(Rinterface, ConstMethod::constants_offset(), Rinterface); + __ ld_ptr(Rinterface, ConstantPool::pool_holder_offset_in_bytes(), Rinterface); + + // Get itable index from method + const Register Rindex = G5_method; + __ ld(Rmethod, Method::itable_index_offset(), Rindex); + __ sub(Rindex, Method::itable_index_max, Rindex); + __ neg(Rindex); + + // Preserve O2_Klass for throw_AbstractMethodErrorVerbose + __ mov(O2_Klass, O4); + __ lookup_interface_method(// inputs: rec. class, interface, itable index + O4, Rinterface, Rindex, + // outputs: method, scan temp reg, temp reg + G5_method, Rscratch, Rtemp, + L_no_such_interface); + + // Check for abstract method error. + { + Label ok; + __ br_notnull_short(G5_method, Assembler::pt, ok); + // Pass arguments for generating a verbose error message. + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), + O2_Klass, Rmethod); + __ should_not_reach_here(); + __ bind(ok); + } + + Register Rcall = Rinterface; + assert_different_registers(Rcall, G5_method, Gargs, Rret); + + __ profile_arguments_type(G5_method, Rcall, Gargs, true); + __ call_from_interpreter(Rcall, Gargs, Rret); + + __ bind(L_no_such_interface); + // Pass arguments for generating a verbose error message. + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), + O2_Klass, Rinterface); + __ should_not_reach_here(); +} + +void TemplateTable::invokehandle(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + + const Register Rret = Lscratch; + const Register G4_mtype = G4_scratch; + const Register O0_recv = O0; + const Register Rscratch = G3_scratch; + + prepare_invoke(byte_no, G5_method, Rret, G4_mtype, O0_recv); + __ null_check(O0_recv); + + // G4: MethodType object (from cpool->resolved_references[f1], if necessary) + // G5: MH.invokeExact_MT method (from f2) + + // Note: G4_mtype is already pushed (if necessary) by prepare_invoke + + // do the call + __ verify_oop(G4_mtype); + __ profile_final_call(O4); // FIXME: profile the LambdaForm also + __ profile_arguments_type(G5_method, Rscratch, Gargs, true); + __ call_from_interpreter(Rscratch, Gargs, Rret); +} + + +void TemplateTable::invokedynamic(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + + const Register Rret = Lscratch; + const Register G4_callsite = G4_scratch; + const Register Rscratch = G3_scratch; + + prepare_invoke(byte_no, G5_method, Rret, G4_callsite); + + // G4: CallSite object (from cpool->resolved_references[f1]) + // G5: MH.linkToCallSite method (from f2) + + // Note: G4_callsite is already pushed by prepare_invoke + + // %%% should make a type profile for any invokedynamic that takes a ref argument + // profile this call + __ profile_call(O4); + + // do the call + __ verify_oop(G4_callsite); + __ profile_arguments_type(G5_method, Rscratch, Gargs, false); + __ call_from_interpreter(Rscratch, Gargs, Rret); +} + + +//---------------------------------------------------------------------------------------------------- +// Allocation + +void TemplateTable::_new() { + transition(vtos, atos); + + Label slow_case; + Label done; + Label initialize_header; + Label initialize_object; // including clearing the fields + + Register RallocatedObject = Otos_i; + Register RinstanceKlass = O1; + Register Roffset = O3; + Register Rscratch = O4; + + __ get_2_byte_integer_at_bcp(1, Rscratch, Roffset, InterpreterMacroAssembler::Unsigned); + __ get_cpool_and_tags(Rscratch, G3_scratch); + // make sure the class we're about to instantiate has been resolved + // This is done before loading InstanceKlass to be consistent with the order + // how Constant Pool is updated (see ConstantPool::klass_at_put) + __ add(G3_scratch, Array::base_offset_in_bytes(), G3_scratch); + __ ldub(G3_scratch, Roffset, G3_scratch); + __ cmp(G3_scratch, JVM_CONSTANT_Class); + __ br(Assembler::notEqual, false, Assembler::pn, slow_case); + __ delayed()->sll(Roffset, LogBytesPerWord, Roffset); + // get InstanceKlass + __ load_resolved_klass_at_offset(Rscratch, Roffset, RinstanceKlass); + + // make sure klass is fully initialized: + __ ldub(RinstanceKlass, in_bytes(InstanceKlass::init_state_offset()), G3_scratch); + __ cmp(G3_scratch, InstanceKlass::fully_initialized); + __ br(Assembler::notEqual, false, Assembler::pn, slow_case); + __ delayed()->ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset); + + // get instance_size in InstanceKlass (already aligned) + //__ ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset); + + // make sure klass does not have has_finalizer, or is abstract, or interface or java/lang/Class + __ btst(Klass::_lh_instance_slow_path_bit, Roffset); + __ br(Assembler::notZero, false, Assembler::pn, slow_case); + __ delayed()->nop(); + + // Allocate the instance: + // If TLAB is enabled: + // Try to allocate in the TLAB. + // If fails, go to the slow path. + // Else If inline contiguous allocations are enabled: + // Try to allocate in eden. + // If fails due to heap end, go to slow path. + // + // If TLAB is enabled OR inline contiguous is enabled: + // Initialize the allocation. + // Exit. + // + // Go to slow path. + + const bool allow_shared_alloc = + Universe::heap()->supports_inline_contig_alloc(); + + if(UseTLAB) { + Register RoldTopValue = RallocatedObject; + Register RtlabWasteLimitValue = G3_scratch; + Register RnewTopValue = G1_scratch; + Register RendValue = Rscratch; + Register RfreeValue = RnewTopValue; + + // check if we can allocate in the TLAB + __ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), RoldTopValue); // sets up RalocatedObject + __ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), RendValue); + __ add(RoldTopValue, Roffset, RnewTopValue); + + // if there is enough space, we do not CAS and do not clear + __ cmp(RnewTopValue, RendValue); + if(ZeroTLAB) { + // the fields have already been cleared + __ brx(Assembler::lessEqualUnsigned, true, Assembler::pt, initialize_header); + } else { + // initialize both the header and fields + __ brx(Assembler::lessEqualUnsigned, true, Assembler::pt, initialize_object); + } + __ delayed()->st_ptr(RnewTopValue, G2_thread, in_bytes(JavaThread::tlab_top_offset())); + + // Allocation does not fit in the TLAB. + __ ba_short(slow_case); + } else { + // Allocation in the shared Eden + if (allow_shared_alloc) { + Register RoldTopValue = G1_scratch; + Register RtopAddr = G3_scratch; + Register RnewTopValue = RallocatedObject; + Register RendValue = Rscratch; + + __ set((intptr_t)Universe::heap()->top_addr(), RtopAddr); + + Label retry; + __ bind(retry); + __ set((intptr_t)Universe::heap()->end_addr(), RendValue); + __ ld_ptr(RendValue, 0, RendValue); + __ ld_ptr(RtopAddr, 0, RoldTopValue); + __ add(RoldTopValue, Roffset, RnewTopValue); + + // RnewTopValue contains the top address after the new object + // has been allocated. + __ cmp_and_brx_short(RnewTopValue, RendValue, Assembler::greaterUnsigned, Assembler::pn, slow_case); + + __ cas_ptr(RtopAddr, RoldTopValue, RnewTopValue); + + // if someone beat us on the allocation, try again, otherwise continue + __ cmp_and_brx_short(RoldTopValue, RnewTopValue, Assembler::notEqual, Assembler::pn, retry); + + // bump total bytes allocated by this thread + // RoldTopValue and RtopAddr are dead, so can use G1 and G3 + __ incr_allocated_bytes(Roffset, G1_scratch, G3_scratch); + } + } + + // If UseTLAB or allow_shared_alloc are true, the object is created above and + // there is an initialize need. Otherwise, skip and go to the slow path. + if (UseTLAB || allow_shared_alloc) { + // clear object fields + __ bind(initialize_object); + __ deccc(Roffset, sizeof(oopDesc)); + __ br(Assembler::zero, false, Assembler::pt, initialize_header); + __ delayed()->add(RallocatedObject, sizeof(oopDesc), G3_scratch); + + // initialize remaining object fields + if (UseBlockZeroing) { + // Use BIS for zeroing + __ bis_zeroing(G3_scratch, Roffset, G1_scratch, initialize_header); + } else { + Label loop; + __ subcc(Roffset, wordSize, Roffset); + __ bind(loop); + //__ subcc(Roffset, wordSize, Roffset); // executed above loop or in delay slot + __ st_ptr(G0, G3_scratch, Roffset); + __ br(Assembler::notEqual, false, Assembler::pt, loop); + __ delayed()->subcc(Roffset, wordSize, Roffset); + } + __ ba_short(initialize_header); + } + + // slow case + __ bind(slow_case); + __ get_2_byte_integer_at_bcp(1, G3_scratch, O2, InterpreterMacroAssembler::Unsigned); + __ get_constant_pool(O1); + + call_VM(Otos_i, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), O1, O2); + + __ ba_short(done); + + // Initialize the header: mark, klass + __ bind(initialize_header); + + if (UseBiasedLocking) { + __ ld_ptr(RinstanceKlass, in_bytes(Klass::prototype_header_offset()), G4_scratch); + } else { + __ set((intptr_t)markWord::prototype().value(), G4_scratch); + } + __ st_ptr(G4_scratch, RallocatedObject, oopDesc::mark_offset_in_bytes()); // mark + __ store_klass_gap(G0, RallocatedObject); // klass gap if compressed + __ store_klass(RinstanceKlass, RallocatedObject); // klass (last for cms) + + { + SkipIfEqual skip_if( + _masm, G4_scratch, &DTraceAllocProbes, Assembler::zero); + // Trigger dtrace event + __ push(atos); + __ call_VM_leaf(noreg, + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), O0); + __ pop(atos); + } + + // continue + __ bind(done); +} + + + +void TemplateTable::newarray() { + transition(itos, atos); + __ ldub(Lbcp, 1, O1); + call_VM(Otos_i, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), O1, Otos_i); +} + + +void TemplateTable::anewarray() { + transition(itos, atos); + __ get_constant_pool(O1); + __ get_2_byte_integer_at_bcp(1, G4_scratch, O2, InterpreterMacroAssembler::Unsigned); + call_VM(Otos_i, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), O1, O2, Otos_i); +} + + +void TemplateTable::arraylength() { + transition(atos, itos); + Label ok; + __ verify_oop(Otos_i); + __ tst(Otos_i); + __ throw_if_not_1_x( Assembler::notZero, ok ); + __ delayed()->ld(Otos_i, arrayOopDesc::length_offset_in_bytes(), Otos_i); + __ throw_if_not_2( Interpreter::_throw_NullPointerException_entry, G3_scratch, ok); +} + + +void TemplateTable::checkcast() { + transition(atos, atos); + Label done, is_null, quicked, cast_ok, resolved; + Register Roffset = G1_scratch; + Register RobjKlass = O5; + Register RspecifiedKlass = O4; + + // Check for casting a NULL + __ br_null(Otos_i, false, Assembler::pn, is_null); + __ delayed()->nop(); + + // Get value klass in RobjKlass + __ load_klass(Otos_i, RobjKlass); // get value klass + + // Get constant pool tag + __ get_2_byte_integer_at_bcp(1, Lscratch, Roffset, InterpreterMacroAssembler::Unsigned); + + // See if the checkcast has been quickened + __ get_cpool_and_tags(Lscratch, G3_scratch); + __ add(G3_scratch, Array::base_offset_in_bytes(), G3_scratch); + __ ldub(G3_scratch, Roffset, G3_scratch); + __ cmp(G3_scratch, JVM_CONSTANT_Class); + __ br(Assembler::equal, true, Assembler::pt, quicked); + __ delayed()->sll(Roffset, LogBytesPerWord, Roffset); + + __ push_ptr(); // save receiver for result, and for GC + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc) ); + __ get_vm_result_2(RspecifiedKlass); + __ pop_ptr(Otos_i, G3_scratch); // restore receiver + + __ ba_short(resolved); + + // Extract target class from constant pool + __ bind(quicked); + __ load_resolved_klass_at_offset(Lscratch, Roffset, RspecifiedKlass); + + + __ bind(resolved); + __ load_klass(Otos_i, RobjKlass); // get value klass + + // Generate a fast subtype check. Branch to cast_ok if no + // failure. Throw exception if failure. + __ gen_subtype_check( RobjKlass, RspecifiedKlass, G3_scratch, G4_scratch, G1_scratch, cast_ok ); + + // Not a subtype; so must throw exception + __ throw_if_not_x( Assembler::never, Interpreter::_throw_ClassCastException_entry, G3_scratch ); + + __ bind(cast_ok); + + if (ProfileInterpreter) { + __ ba_short(done); + } + __ bind(is_null); + __ profile_null_seen(G3_scratch); + __ bind(done); +} + + +void TemplateTable::instanceof() { + Label done, is_null, quicked, resolved; + transition(atos, itos); + Register Roffset = G1_scratch; + Register RobjKlass = O5; + Register RspecifiedKlass = O4; + + // Check for casting a NULL + __ br_null(Otos_i, false, Assembler::pt, is_null); + __ delayed()->nop(); + + // Get value klass in RobjKlass + __ load_klass(Otos_i, RobjKlass); // get value klass + + // Get constant pool tag + __ get_2_byte_integer_at_bcp(1, Lscratch, Roffset, InterpreterMacroAssembler::Unsigned); + + // See if the checkcast has been quickened + __ get_cpool_and_tags(Lscratch, G3_scratch); + __ add(G3_scratch, Array::base_offset_in_bytes(), G3_scratch); + __ ldub(G3_scratch, Roffset, G3_scratch); + __ cmp(G3_scratch, JVM_CONSTANT_Class); + __ br(Assembler::equal, true, Assembler::pt, quicked); + __ delayed()->sll(Roffset, LogBytesPerWord, Roffset); + + __ push_ptr(); // save receiver for result, and for GC + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc) ); + __ get_vm_result_2(RspecifiedKlass); + __ pop_ptr(Otos_i, G3_scratch); // restore receiver + + __ ba_short(resolved); + + // Extract target class from constant pool + __ bind(quicked); + __ get_constant_pool(Lscratch); + __ load_resolved_klass_at_offset(Lscratch, Roffset, RspecifiedKlass); + + __ bind(resolved); + __ load_klass(Otos_i, RobjKlass); // get value klass + + // Generate a fast subtype check. Branch to cast_ok if no + // failure. Return 0 if failure. + __ or3(G0, 1, Otos_i); // set result assuming quick tests succeed + __ gen_subtype_check( RobjKlass, RspecifiedKlass, G3_scratch, G4_scratch, G1_scratch, done ); + // Not a subtype; return 0; + __ clr( Otos_i ); + + if (ProfileInterpreter) { + __ ba_short(done); + } + __ bind(is_null); + __ profile_null_seen(G3_scratch); + __ bind(done); +} + +void TemplateTable::_breakpoint() { + + // Note: We get here even if we are single stepping.. + // jbug insists on setting breakpoints at every bytecode + // even if we are in single step mode. + + transition(vtos, vtos); + // get the unpatched byte code + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::get_original_bytecode_at), Lmethod, Lbcp); + __ mov(O0, Lbyte_code); + + // post the breakpoint event + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), Lmethod, Lbcp); + + // complete the execution of original bytecode + __ dispatch_normal(vtos); +} + + +//---------------------------------------------------------------------------------------------------- +// Exceptions + +void TemplateTable::athrow() { + transition(atos, vtos); + + // This works because exception is cached in Otos_i which is same as O0, + // which is same as what throw_exception_entry_expects + assert(Otos_i == Oexception, "see explanation above"); + + __ verify_oop(Otos_i); + __ null_check(Otos_i); + __ throw_if_not_x(Assembler::never, Interpreter::throw_exception_entry(), G3_scratch); +} + + +//---------------------------------------------------------------------------------------------------- +// Synchronization + + +// See frame_sparc.hpp for monitor block layout. +// Monitor elements are dynamically allocated by growing stack as needed. + +void TemplateTable::monitorenter() { + transition(atos, vtos); + __ verify_oop(Otos_i); + // Try to acquire a lock on the object + // Repeat until succeeded (i.e., until + // monitorenter returns true). + + { Label ok; + __ tst(Otos_i); + __ throw_if_not_1_x( Assembler::notZero, ok); + __ delayed()->mov(Otos_i, Lscratch); // save obj + __ throw_if_not_2( Interpreter::_throw_NullPointerException_entry, G3_scratch, ok); + } + + assert(O0 == Otos_i, "Be sure where the object to lock is"); + + // find a free slot in the monitor block + + + // initialize entry pointer + __ clr(O1); // points to free slot or NULL + + { + Label entry, loop, exit; + __ add( __ top_most_monitor(), O2 ); // last one to check + __ ba( entry ); + __ delayed()->mov( Lmonitors, O3 ); // first one to check + + + __ bind( loop ); + + __ verify_oop(O4); // verify each monitor's oop + __ tst(O4); // is this entry unused? + __ movcc( Assembler::zero, false, Assembler::ptr_cc, O3, O1); + + __ cmp(O4, O0); // check if current entry is for same object + __ brx( Assembler::equal, false, Assembler::pn, exit ); + __ delayed()->inc( O3, frame::interpreter_frame_monitor_size() * wordSize ); // check next one + + __ bind( entry ); + + __ cmp( O3, O2 ); + __ brx( Assembler::lessEqualUnsigned, true, Assembler::pt, loop ); + __ delayed()->ld_ptr(O3, BasicObjectLock::obj_offset_in_bytes(), O4); + + __ bind( exit ); + } + + { Label allocated; + + // found free slot? + __ br_notnull_short(O1, Assembler::pn, allocated); + + __ add_monitor_to_stack( false, O2, O3 ); + __ mov(Lmonitors, O1); + + __ bind(allocated); + } + + // Increment bcp to point to the next bytecode, so exception handling for async. exceptions work correctly. + // The object has already been poped from the stack, so the expression stack looks correct. + __ inc(Lbcp); + + __ st_ptr(O0, O1, BasicObjectLock::obj_offset_in_bytes()); // store object + __ lock_object(O1, O0); + + // check if there's enough space on the stack for the monitors after locking + __ generate_stack_overflow_check(0); + + // The bcp has already been incremented. Just need to dispatch to next instruction. + __ dispatch_next(vtos); +} + + +void TemplateTable::monitorexit() { + transition(atos, vtos); + __ verify_oop(Otos_i); + __ tst(Otos_i); + __ throw_if_not_x( Assembler::notZero, Interpreter::_throw_NullPointerException_entry, G3_scratch ); + + assert(O0 == Otos_i, "just checking"); + + { Label entry, loop, found; + __ add( __ top_most_monitor(), O2 ); // last one to check + __ ba(entry); + // use Lscratch to hold monitor elem to check, start with most recent monitor, + // By using a local it survives the call to the C routine. + __ delayed()->mov( Lmonitors, Lscratch ); + + __ bind( loop ); + + __ verify_oop(O4); // verify each monitor's oop + __ cmp(O4, O0); // check if current entry is for desired object + __ brx( Assembler::equal, true, Assembler::pt, found ); + __ delayed()->mov(Lscratch, O1); // pass found entry as argument to monitorexit + + __ inc( Lscratch, frame::interpreter_frame_monitor_size() * wordSize ); // advance to next + + __ bind( entry ); + + __ cmp( Lscratch, O2 ); + __ brx( Assembler::lessEqualUnsigned, true, Assembler::pt, loop ); + __ delayed()->ld_ptr(Lscratch, BasicObjectLock::obj_offset_in_bytes(), O4); + + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception)); + __ should_not_reach_here(); + + __ bind(found); + } + __ unlock_object(O1); +} + + +//---------------------------------------------------------------------------------------------------- +// Wide instructions + +void TemplateTable::wide() { + transition(vtos, vtos); + __ ldub(Lbcp, 1, G3_scratch);// get next bc + __ sll(G3_scratch, LogBytesPerWord, G3_scratch); + AddressLiteral ep(Interpreter::_wentry_point); + __ set(ep, G4_scratch); + __ ld_ptr(G4_scratch, G3_scratch, G3_scratch); + __ jmp(G3_scratch, G0); + __ delayed()->nop(); + // Note: the Lbcp increment step is part of the individual wide bytecode implementations +} + + +//---------------------------------------------------------------------------------------------------- +// Multi arrays + +void TemplateTable::multianewarray() { + transition(vtos, atos); + // put ndims * wordSize into Lscratch + __ ldub( Lbcp, 3, Lscratch); + __ sll( Lscratch, Interpreter::logStackElementSize, Lscratch); + // Lesp points past last_dim, so set to O1 to first_dim address + __ add( Lesp, Lscratch, O1); + call_VM(Otos_i, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), O1); + __ add( Lesp, Lscratch, Lesp); // pop all dimensions off the stack +} diff -ur --new-file a/src/hotspot/cpu/sparc/templateTable_sparc.hpp b/src/hotspot/cpu/sparc/templateTable_sparc.hpp --- a/src/hotspot/cpu/sparc/templateTable_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/templateTable_sparc.hpp 2023-04-16 11:42:11.078917341 +0000 @@ -0,0 +1,43 @@ +/* + * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_TEMPLATETABLE_SPARC_HPP +#define CPU_SPARC_TEMPLATETABLE_SPARC_HPP + + static void prepare_invoke(int byte_no, + Register method, // linked method (or i-klass) + Register ra, // return address + Register index = noreg, // itable index, MethodType, etc. + Register recv = noreg, // if caller wants to see it + Register flags = noreg // if caller wants to test it + ); + // helper function + static void invokevfinal_helper(Register Rcache, Register Rret); + static void invokeinterface_object_method(Register RKlass, Register Rcall, + Register Rret, + Register Rflags); + static void generate_vtable_call(Register Rrecv, Register Rindex, Register Rret); + static void volatile_barrier(Assembler::Membar_mask_bits order_constraint); + +#endif // CPU_SPARC_TEMPLATETABLE_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/vmStructs_sparc.hpp b/src/hotspot/cpu/sparc/vmStructs_sparc.hpp --- a/src/hotspot/cpu/sparc/vmStructs_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/vmStructs_sparc.hpp 2023-04-16 11:42:11.079085368 +0000 @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_VMSTRUCTS_SPARC_HPP +#define CPU_SPARC_VMSTRUCTS_SPARC_HPP + +// These are the CPU-specific fields, types and integer +// constants required by the Serviceability Agent. This file is +// referenced by vmStructs.cpp. + +#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ + volatile_nonstatic_field(JavaFrameAnchor, _flags, int) + +#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ + +#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ + /******************************/ \ + /* Register numbers (C2 only) */ \ + /******************************/ \ + \ + declare_c2_constant(R_L0_num) \ + declare_c2_constant(R_L1_num) \ + declare_c2_constant(R_L2_num) \ + declare_c2_constant(R_L3_num) \ + declare_c2_constant(R_L4_num) \ + declare_c2_constant(R_L5_num) \ + declare_c2_constant(R_L6_num) \ + declare_c2_constant(R_L7_num) \ + declare_c2_constant(R_I0_num) \ + declare_c2_constant(R_I1_num) \ + declare_c2_constant(R_I2_num) \ + declare_c2_constant(R_I3_num) \ + declare_c2_constant(R_I4_num) \ + declare_c2_constant(R_I5_num) \ + declare_c2_constant(R_FP_num) \ + declare_c2_constant(R_I7_num) \ + declare_c2_constant(R_O0_num) \ + declare_c2_constant(R_O1_num) \ + declare_c2_constant(R_O2_num) \ + declare_c2_constant(R_O3_num) \ + declare_c2_constant(R_O4_num) \ + declare_c2_constant(R_O5_num) \ + declare_c2_constant(R_SP_num) \ + declare_c2_constant(R_O7_num) \ + declare_c2_constant(R_G0_num) \ + declare_c2_constant(R_G1_num) \ + declare_c2_constant(R_G2_num) \ + declare_c2_constant(R_G3_num) \ + declare_c2_constant(R_G4_num) \ + declare_c2_constant(R_G5_num) \ + declare_c2_constant(R_G6_num) \ + declare_c2_constant(R_G7_num) \ + declare_constant(VM_Version::ISA_V9) \ + declare_constant(VM_Version::ISA_POPC) \ + declare_constant(VM_Version::ISA_VIS1) \ + declare_constant(VM_Version::ISA_VIS2) \ + declare_constant(VM_Version::ISA_BLK_INIT) \ + declare_constant(VM_Version::ISA_FMAF) \ + declare_constant(VM_Version::ISA_VIS3) \ + declare_constant(VM_Version::ISA_HPC) \ + declare_constant(VM_Version::ISA_IMA) \ + declare_constant(VM_Version::ISA_AES) \ + declare_constant(VM_Version::ISA_DES) \ + declare_constant(VM_Version::ISA_KASUMI) \ + declare_constant(VM_Version::ISA_CAMELLIA) \ + declare_constant(VM_Version::ISA_MD5) \ + declare_constant(VM_Version::ISA_SHA1) \ + declare_constant(VM_Version::ISA_SHA256) \ + declare_constant(VM_Version::ISA_SHA512) \ + declare_constant(VM_Version::ISA_MPMUL) \ + declare_constant(VM_Version::ISA_MONT) \ + declare_constant(VM_Version::ISA_PAUSE) \ + declare_constant(VM_Version::ISA_CBCOND) \ + declare_constant(VM_Version::ISA_CRC32C) \ + declare_constant(VM_Version::ISA_VIS3B) \ + declare_constant(VM_Version::ISA_ADI) \ + declare_constant(VM_Version::ISA_SPARC5) \ + declare_constant(VM_Version::ISA_MWAIT) \ + declare_constant(VM_Version::ISA_XMPMUL) \ + declare_constant(VM_Version::ISA_XMONT) \ + declare_constant(VM_Version::ISA_PAUSE_NSEC) \ + declare_constant(VM_Version::ISA_VAMASK) \ + declare_constant(VM_Version::ISA_SPARC6) \ + declare_constant(VM_Version::ISA_DICTUNP) \ + declare_constant(VM_Version::ISA_FPCMPSHL) \ + declare_constant(VM_Version::ISA_RLE) \ + declare_constant(VM_Version::ISA_SHA3) \ + declare_constant(VM_Version::ISA_VIS3C) \ + declare_constant(VM_Version::ISA_SPARC5B) \ + declare_constant(VM_Version::ISA_MME) \ + declare_constant(VM_Version::CPU_FAST_IDIV) \ + declare_constant(VM_Version::CPU_FAST_RDPC) \ + declare_constant(VM_Version::CPU_FAST_BIS) \ + declare_constant(VM_Version::CPU_FAST_LD) \ + declare_constant(VM_Version::CPU_FAST_CMOVE) \ + declare_constant(VM_Version::CPU_FAST_IND_BR) \ + declare_constant(VM_Version::CPU_BLK_ZEROING) + +#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#endif // CPU_SPARC_VMSTRUCTS_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/vm_version_ext_sparc.cpp b/src/hotspot/cpu/sparc/vm_version_ext_sparc.cpp --- a/src/hotspot/cpu/sparc/vm_version_ext_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/vm_version_ext_sparc.cpp 2023-04-16 11:42:11.079243696 +0000 @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "jvm.h" +#include "memory/allocation.hpp" +#include "memory/allocation.inline.hpp" +#include "vm_version_ext_sparc.hpp" + +// VM_Version_Ext statics +int VM_Version_Ext::_no_of_threads = 0; +int VM_Version_Ext::_no_of_cores = 0; +int VM_Version_Ext::_no_of_sockets = 0; +#if defined(SOLARIS) +kid_t VM_Version_Ext::_kcid = -1; +#endif +char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0}; +char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0}; + +#if defined(SOLARIS) +// get cpu information. It takes into account if the kstat chain id +// has been changed and update the info if necessary. +bool VM_Version_Ext::initialize_cpu_information(void) { + + int core_id = -1; + int chip_id = -1; + int len = 0; + char* src_string = NULL; + kstat_ctl_t* kc = kstat_open(); + if (!kc) { + return false; + } + + // check if kstat chain has been updated + kid_t kcid = kstat_chain_update(kc); + if (kcid == -1) { + kstat_close(kc); + return false; + } + + bool updated = ((kcid > 0) && (kcid != _kcid)) || + ((kcid == 0) && (_kcid == -1)); + if (!updated) { + kstat_close(kc); + return true; + } + + // update the cached _kcid + _kcid = kcid; + + // find the number of online processors + // for modern processors, it is also known as the + // hardware threads. + _no_of_threads = sysconf(_SC_NPROCESSORS_ONLN); + + if (_no_of_threads <= 0 ) { + kstat_close(kc); + return false; + } + + _no_of_cores = 0; + _no_of_sockets = 0; + + // loop through the kstat chain + kstat_t* ksp = NULL; + for (ksp = kc->kc_chain; ksp != NULL; ksp = ksp->ks_next) { + // only interested in "cpu_info" + if (strcmp(ksp->ks_module, (char*)CPU_INFO) == 0) { + if (kstat_read(kc, ksp, NULL) == -1) { + kstat_close(kc); + return false; + } + if (ksp->ks_data != NULL) { + kstat_named_t* knm = (kstat_named_t *)ksp->ks_data; + // loop through the number of fields in each record + for (int i = 0; i < ksp->ks_ndata; i++) { + // set cpu type if it hasn't been already set + if ((strcmp((const char*)&(knm[i].name), CPU_TYPE) == 0) && + (_cpu_name[0] == '\0')) { + if (knm[i].data_type == KSTAT_DATA_STRING) { + src_string = (char*)KSTAT_NAMED_STR_PTR(&knm[i]); + } else { + src_string = (char*)&(knm[i].value.c[0]); + } + len = strlen(src_string); + if (len < CPU_TYPE_DESC_BUF_SIZE) { + jio_snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE, + "%s", src_string); + } + } + + // set cpu description if it hasn't been already set + if ((strcmp((const char*)&(knm[i].name), CPU_DESCRIPTION) == 0) && + (_cpu_desc[0] == '\0')) { + if (knm[i].data_type == KSTAT_DATA_STRING) { + src_string = (char*)KSTAT_NAMED_STR_PTR(&knm[i]); + } else { + src_string = (char*)&(knm[i].value.c[0]); + } + len = strlen(src_string); + if (len < CPU_DETAILED_DESC_BUF_SIZE) { + jio_snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, + "%s", src_string); + } + } + + // count the number of sockets based on the chip id + if (strcmp((const char*)&(knm[i].name), CHIP_ID) == 0) { + if (chip_id != knm[i].value.l) { + chip_id = knm[i].value.l; + _no_of_sockets++; + } + } + + // count the number of cores based on the core id + if (strcmp((const char*)&(knm[i].name), CORE_ID) == 0) { + if (core_id != knm[i].value.l) { + core_id = knm[i].value.l; + _no_of_cores++; + } + } + } + } + } + } + + kstat_close(kc); + return true; +} +#elif defined(LINUX) +// get cpu information. +bool VM_Version_Ext::initialize_cpu_information(void) { + // Not yet implemented. + return false; +} +#endif + +int VM_Version_Ext::number_of_threads(void) { + initialize_cpu_information(); + return _no_of_threads; +} + +int VM_Version_Ext::number_of_cores(void) { + initialize_cpu_information(); + return _no_of_cores; +} + +int VM_Version_Ext::number_of_sockets(void) { + initialize_cpu_information(); + return _no_of_sockets; +} + +const char* VM_Version_Ext::cpu_name(void) { + if (!initialize_cpu_information()) { + return NULL; + } + char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing); + if (NULL == tmp) { + return NULL; + } + strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE); + return tmp; +} + +const char* VM_Version_Ext::cpu_description(void) { + if (!initialize_cpu_information()) { + return NULL; + } + char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing); + if (NULL == tmp) { + return NULL; + } + strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); + return tmp; +} diff -ur --new-file a/src/hotspot/cpu/sparc/vm_version_ext_sparc.hpp b/src/hotspot/cpu/sparc/vm_version_ext_sparc.hpp --- a/src/hotspot/cpu/sparc/vm_version_ext_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/vm_version_ext_sparc.hpp 2023-04-16 11:42:11.079359103 +0000 @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2013, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_VM_VERSION_EXT_SPARC_HPP +#define CPU_SPARC_VM_VERSION_EXT_SPARC_HPP + +#include "runtime/vm_version.hpp" +#include "utilities/macros.hpp" + +#if defined(SOLARIS) +#include +#include +#endif + +#define CPU_INFO "cpu_info" +#define CPU_TYPE "fpu_type" +#define CPU_DESCRIPTION "implementation" +#define CHIP_ID "chip_id" +#define CORE_ID "core_id" + +class VM_Version_Ext : public VM_Version { + private: + + static const size_t CPU_TYPE_DESC_BUF_SIZE = 256; + static const size_t CPU_DETAILED_DESC_BUF_SIZE = 4096; + + static int _no_of_threads; + static int _no_of_cores; + static int _no_of_sockets; +#if defined(SOLARIS) + static kid_t _kcid; +#endif + static char _cpu_name[CPU_TYPE_DESC_BUF_SIZE]; + static char _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE]; + + static bool initialize_cpu_information(void); + + public: + + static int number_of_threads(void); + static int number_of_cores(void); + static int number_of_sockets(void); + + static const char* cpu_name(void); + static const char* cpu_description(void); +}; + +#endif // CPU_SPARC_VM_VERSION_EXT_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/vm_version_sparc.cpp b/src/hotspot/cpu/sparc/vm_version_sparc.cpp --- a/src/hotspot/cpu/sparc/vm_version_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/vm_version_sparc.cpp 2023-04-16 11:42:11.079624386 +0000 @@ -0,0 +1,524 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "jvm.h" +#include "asm/macroAssembler.inline.hpp" +#include "logging/log.hpp" +#include "logging/logStream.hpp" +#include "memory/resourceArea.hpp" +#include "oops/compressedOops.hpp" +#include "runtime/java.hpp" +#include "runtime/os.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/vm_version.hpp" + +#include + +uint VM_Version::_L2_data_cache_line_size = 0; + +void VM_Version::initialize() { + assert(_features != 0, "System pre-initialization is not complete."); + guarantee(VM_Version::has_v9(), "only SPARC v9 is supported"); + + PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes(); + PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes(); + PrefetchFieldsAhead = prefetch_fields_ahead(); + + // Allocation prefetch settings + + AllocatePrefetchDistance = allocate_prefetch_distance(); + AllocatePrefetchStyle = allocate_prefetch_style(); + + intx cache_line_size = prefetch_data_size(); + + if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) { + AllocatePrefetchStepSize = MAX2(AllocatePrefetchStepSize, cache_line_size); + } + + if (AllocatePrefetchInstr == 1) { + if (!has_blk_init()) { + warning("BIS instructions required for AllocatePrefetchInstr 1 unavailable"); + FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0); + } + if (cache_line_size <= 0) { + warning("Cache-line size must be known for AllocatePrefetchInstr 1 to work"); + FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0); + } + } + + _supports_cx8 = true; // All SPARC V9 implementations. + _supports_atomic_getset4 = true; // Using the 'swap' instruction. + + if (has_fast_ind_br() && FLAG_IS_DEFAULT(UseInlineCaches)) { + // Indirect and direct branches are cost equivalent. + FLAG_SET_DEFAULT(UseInlineCaches, false); + } + // Align loops on the proper instruction boundary to fill the instruction + // fetch buffer. + if (FLAG_IS_DEFAULT(OptoLoopAlignment)) { + FLAG_SET_DEFAULT(OptoLoopAlignment, VM_Version::insn_fetch_alignment); + } + + // 32-bit oops don't make sense for the 64-bit VM on SPARC since the 32-bit + // VM has the same registers and smaller objects. + CompressedOops::set_shift(LogMinObjAlignmentInBytes); + CompressedKlassPointers::set_shift(LogKlassAlignmentInBytes); + +#ifdef COMPILER2 + if (has_fast_ind_br() && FLAG_IS_DEFAULT(UseJumpTables)) { + // Indirect and direct branches are cost equivalent. + FLAG_SET_DEFAULT(UseJumpTables, true); + } + // Entry and loop tops are aligned to fill the instruction fetch buffer. + if (FLAG_IS_DEFAULT(InteriorEntryAlignment)) { + FLAG_SET_DEFAULT(InteriorEntryAlignment, VM_Version::insn_fetch_alignment); + } + if (UseTLAB && cache_line_size > 0 && + FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { + if (has_fast_bis()) { + // Use BIS instruction for TLAB allocation prefetch. + FLAG_SET_DEFAULT(AllocatePrefetchInstr, 1); + } + else if (has_sparc5()) { + // Use prefetch instruction to avoid partial RAW issue on Core C4 processors, + // also use prefetch style 3. + FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0); + if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { + FLAG_SET_DEFAULT(AllocatePrefetchStyle, 3); + } + } + } + if (AllocatePrefetchInstr == 1) { + // Use allocation prefetch style 3 because BIS instructions require + // aligned memory addresses. + FLAG_SET_DEFAULT(AllocatePrefetchStyle, 3); + } + if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { + if (AllocatePrefetchInstr == 0) { + // Use different prefetch distance without BIS + FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256); + } else { + // Use smaller prefetch distance with BIS + FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64); + } + } + + // We increase the number of prefetched cache lines, to use just a bit more + // aggressive approach, when the L2-cache line size is small (32 bytes), or + // when running on newer processor implementations, such as the Core C4. + bool inc_prefetch = cache_line_size > 0 && (cache_line_size < 64 || has_sparc5()); + + if (inc_prefetch) { + // We use a factor two for small cache line sizes (as before) but a slightly + // more conservative increase when running on more recent hardware that will + // benefit from just a bit more aggressive prefetching. + if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) { + const int ap_lns = AllocatePrefetchLines; + const int ap_inc = cache_line_size < 64 ? ap_lns : (ap_lns + 1) / 2; + FLAG_SET_ERGO(AllocatePrefetchLines, ap_lns + ap_inc); + } + if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) { + const int ip_lns = AllocateInstancePrefetchLines; + const int ip_inc = cache_line_size < 64 ? ip_lns : (ip_lns + 1) / 2; + FLAG_SET_ERGO(AllocateInstancePrefetchLines, ip_lns + ip_inc); + } + } +#endif /* COMPILER2 */ + + // Use hardware population count instruction if available. + if (has_popc()) { + if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { + FLAG_SET_DEFAULT(UsePopCountInstruction, true); + } + } else if (UsePopCountInstruction) { + warning("POPC instruction is not available on this CPU"); + FLAG_SET_DEFAULT(UsePopCountInstruction, false); + } + + // Use compare and branch instructions if available. + if (has_cbcond()) { + if (FLAG_IS_DEFAULT(UseCBCond)) { + FLAG_SET_DEFAULT(UseCBCond, true); + } + } else if (UseCBCond) { + warning("CBCOND instruction is not available on this CPU"); + FLAG_SET_DEFAULT(UseCBCond, false); + } + + // Use 'mpmul' instruction if available. + if (has_mpmul()) { + if (FLAG_IS_DEFAULT(UseMPMUL)) { + FLAG_SET_DEFAULT(UseMPMUL, true); + } + } else if (UseMPMUL) { + warning("MPMUL instruction is not available on this CPU"); + FLAG_SET_DEFAULT(UseMPMUL, false); + } + + assert(BlockZeroingLowLimit > 0, "invalid value"); + + if (has_blk_zeroing() && cache_line_size > 0) { + if (FLAG_IS_DEFAULT(UseBlockZeroing)) { + FLAG_SET_DEFAULT(UseBlockZeroing, true); + } + } else if (UseBlockZeroing) { + warning("BIS zeroing instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseBlockZeroing, false); + } + + assert(BlockCopyLowLimit > 0, "invalid value"); + + if (has_blk_zeroing() && cache_line_size > 0) { + if (FLAG_IS_DEFAULT(UseBlockCopy)) { + FLAG_SET_DEFAULT(UseBlockCopy, true); + } + } else if (UseBlockCopy) { + warning("BIS instructions are not available or expensive on this CPU"); + FLAG_SET_DEFAULT(UseBlockCopy, false); + } + +#ifdef COMPILER2 + // Currently not supported anywhere. + FLAG_SET_DEFAULT(UseFPUForSpilling, false); + + MaxVectorSize = 8; + + assert((InteriorEntryAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size"); +#endif + + assert((CodeEntryAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size"); + assert((OptoLoopAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size"); + + char buf[512]; + jio_snprintf(buf, sizeof(buf), + "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s" + "%s%s%s%s%s%s%s%s%s" "%s%s%s%s%s%s%s%s%s" + "%s%s%s%s%s%s%s", + (has_v9() ? "v9" : ""), + (has_popc() ? ", popc" : ""), + (has_vis1() ? ", vis1" : ""), + (has_vis2() ? ", vis2" : ""), + (has_blk_init() ? ", blk_init" : ""), + (has_fmaf() ? ", fmaf" : ""), + (has_hpc() ? ", hpc" : ""), + (has_ima() ? ", ima" : ""), + (has_aes() ? ", aes" : ""), + (has_des() ? ", des" : ""), + (has_kasumi() ? ", kas" : ""), + (has_camellia() ? ", cam" : ""), + (has_md5() ? ", md5" : ""), + (has_sha1() ? ", sha1" : ""), + (has_sha256() ? ", sha256" : ""), + (has_sha512() ? ", sha512" : ""), + (has_mpmul() ? ", mpmul" : ""), + (has_mont() ? ", mont" : ""), + (has_pause() ? ", pause" : ""), + (has_cbcond() ? ", cbcond" : ""), + (has_crc32c() ? ", crc32c" : ""), + + (has_athena_plus() ? ", athena_plus" : ""), + (has_vis3b() ? ", vis3b" : ""), + (has_adi() ? ", adi" : ""), + (has_sparc5() ? ", sparc5" : ""), + (has_mwait() ? ", mwait" : ""), + (has_xmpmul() ? ", xmpmul" : ""), + (has_xmont() ? ", xmont" : ""), + (has_pause_nsec() ? ", pause_nsec" : ""), + (has_vamask() ? ", vamask" : ""), + + (has_sparc6() ? ", sparc6" : ""), + (has_dictunp() ? ", dictunp" : ""), + (has_fpcmpshl() ? ", fpcmpshl" : ""), + (has_rle() ? ", rle" : ""), + (has_sha3() ? ", sha3" : ""), + (has_athena_plus2()? ", athena_plus2" : ""), + (has_vis3c() ? ", vis3c" : ""), + (has_sparc5b() ? ", sparc5b" : ""), + (has_mme() ? ", mme" : ""), + + (has_fast_idiv() ? ", *idiv" : ""), + (has_fast_rdpc() ? ", *rdpc" : ""), + (has_fast_bis() ? ", *bis" : ""), + (has_fast_ld() ? ", *ld" : ""), + (has_fast_cmove() ? ", *cmove" : ""), + (has_fast_ind_br() ? ", *ind_br" : ""), + (has_blk_zeroing() ? ", *blk_zeroing" : "")); + + assert(strlen(buf) >= 2, "must be"); + + _features_string = os::strdup(buf); + + log_info(os, cpu)("SPARC features detected: %s", _features_string); + + // UseVIS is set to the smallest of what hardware supports and what the command + // line requires, i.e. you cannot set UseVIS to 3 on older UltraSparc which do + // not support it. + + if (UseVIS > 3) UseVIS = 3; + if (UseVIS < 0) UseVIS = 0; + if (!has_vis3()) // Drop to 2 if no VIS3 support + UseVIS = MIN2((intx)2, UseVIS); + if (!has_vis2()) // Drop to 1 if no VIS2 support + UseVIS = MIN2((intx)1, UseVIS); + if (!has_vis1()) // Drop to 0 if no VIS1 support + UseVIS = 0; + + if (has_aes()) { + if (FLAG_IS_DEFAULT(UseAES)) { + FLAG_SET_DEFAULT(UseAES, true); + } + if (!UseAES) { + if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { + warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled."); + } + FLAG_SET_DEFAULT(UseAESIntrinsics, false); + } else { + // The AES intrinsic stubs require AES instruction support (of course) + // but also require VIS3 mode or higher for instructions it use. + if (UseVIS > 2) { + if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { + FLAG_SET_DEFAULT(UseAESIntrinsics, true); + } + } else { + if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { + warning("SPARC AES intrinsics require VIS3 instructions. Intrinsics will be disabled."); + } + FLAG_SET_DEFAULT(UseAESIntrinsics, false); + } + } + } else if (UseAES || UseAESIntrinsics) { + if (UseAES && !FLAG_IS_DEFAULT(UseAES)) { + warning("AES instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseAES, false); + } + if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { + warning("AES intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseAESIntrinsics, false); + } + } + + if (UseAESCTRIntrinsics) { + warning("AES/CTR intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); + } + + // GHASH/GCM intrinsics + if (has_vis3() && (UseVIS > 2)) { + if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { + UseGHASHIntrinsics = true; + } + } else if (UseGHASHIntrinsics) { + if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) + warning("GHASH intrinsics require VIS3 instruction support. Intrinsics will be disabled"); + FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); + } + + if (has_fmaf()) { + if (FLAG_IS_DEFAULT(UseFMA)) { + UseFMA = true; + } + } else if (UseFMA) { + warning("FMA instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseFMA, false); + } + + // SHA1, SHA256, and SHA512 instructions were added to SPARC at different times + if (has_sha1() || has_sha256() || has_sha512()) { + if (UseVIS > 0) { // SHA intrinsics use VIS1 instructions + if (FLAG_IS_DEFAULT(UseSHA)) { + FLAG_SET_DEFAULT(UseSHA, true); + } + } else { + if (UseSHA) { + warning("SPARC SHA intrinsics require VIS1 instruction support. Intrinsics will be disabled."); + FLAG_SET_DEFAULT(UseSHA, false); + } + } + } else if (UseSHA) { + warning("SHA instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseSHA, false); + } + + if (UseSHA && has_sha1()) { + if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); + } + } else if (UseSHA1Intrinsics) { + warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); + } + + if (UseSHA && has_sha256()) { + if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); + } + } else if (UseSHA256Intrinsics) { + warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); + } + + if (UseSHA && has_sha512()) { + if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); + } + } else if (UseSHA512Intrinsics) { + warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); + } + + // disable by default, is available on the M8 processor? + if (UseSHA3Intrinsics) { + warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); + } + + if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA, false); + } + + if (has_crc32c()) { + if (UseVIS > 2) { // CRC32C intrinsics use VIS3 instructions + if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, true); + } + } else { + if (UseCRC32CIntrinsics) { + warning("SPARC CRC32C intrinsics require VIS3 instruction support. Intrinsics will be disabled."); + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); + } + } + } else if (UseCRC32CIntrinsics) { + warning("CRC32C instruction is not available on this CPU"); + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); + } + + if (UseVIS > 2) { + if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { + FLAG_SET_DEFAULT(UseAdler32Intrinsics, true); + } + } else if (UseAdler32Intrinsics) { + warning("SPARC Adler32 intrinsics require VIS3 instruction support. Intrinsics will be disabled."); + FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); + } + + if (UseVIS > 2) { + if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { + FLAG_SET_DEFAULT(UseCRC32Intrinsics, true); + } + } else if (UseCRC32Intrinsics) { + warning("SPARC CRC32 intrinsics require VIS3 instructions support. Intrinsics will be disabled"); + FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); + } + + if (UseVIS > 2) { + if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { + FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true); + } + } else if (UseMultiplyToLenIntrinsic) { + warning("SPARC multiplyToLen intrinsics require VIS3 instructions support. Intrinsics will be disabled"); + FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false); + } + + if (UseVectorizedMismatchIntrinsic) { + warning("UseVectorizedMismatchIntrinsic specified, but not available on this CPU."); + FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); + } + + if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && + (cache_line_size > ContendedPaddingWidth)) + ContendedPaddingWidth = cache_line_size; + + // This machine does not allow unaligned memory accesses + if (UseUnalignedAccesses) { + if (!FLAG_IS_DEFAULT(UseUnalignedAccesses)) + warning("Unaligned memory access is not available on this CPU"); + FLAG_SET_DEFAULT(UseUnalignedAccesses, false); + } + + if (log_is_enabled(Info, os, cpu)) { + ResourceMark rm; + LogStream ls(Log(os, cpu)::info()); + outputStream* log = &ls; + log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); + log->print_cr("L2 data cache line size: %u", L2_data_cache_line_size()); + log->print("Allocation"); + if (AllocatePrefetchStyle <= 0) { + log->print(": no prefetching"); + } else { + log->print(" prefetching: "); + if (AllocatePrefetchInstr == 0) { + log->print("PREFETCH"); + } else if (AllocatePrefetchInstr == 1) { + log->print("BIS"); + } + if (AllocatePrefetchLines > 1) { + log->print_cr(" at distance %d, %d lines of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchLines, (int) AllocatePrefetchStepSize); + } else { + log->print_cr(" at distance %d, one line of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchStepSize); + } + } + if (PrefetchCopyIntervalInBytes > 0) { + log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes); + } + if (PrefetchScanIntervalInBytes > 0) { + log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes); + } + if (PrefetchFieldsAhead > 0) { + log->print_cr("PrefetchFieldsAhead %d", (int) PrefetchFieldsAhead); + } + if (ContendedPaddingWidth > 0) { + log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth); + } + } +} + +void VM_Version::print_features() { + tty->print("ISA features [0x%0" PRIx64 "]:", _features); + if (_features_string != NULL) { + tty->print(" %s", _features_string); + } + tty->cr(); +} + +void VM_Version::determine_features() { + platform_features(); // platform_features() is os_arch specific. + + assert(has_v9(), "must be"); + + if (UseNiagaraInstrs) { // Limit code generation to Niagara. + _features &= niagara1_msk; + } +} + +static uint64_t saved_features = 0; + +void VM_Version::allow_all() { + saved_features = _features; + _features = full_feature_msk; +} + +void VM_Version::revert() { + _features = saved_features; +} diff -ur --new-file a/src/hotspot/cpu/sparc/vm_version_sparc.hpp b/src/hotspot/cpu/sparc/vm_version_sparc.hpp --- a/src/hotspot/cpu/sparc/vm_version_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/vm_version_sparc.hpp 2023-04-16 11:42:11.079839225 +0000 @@ -0,0 +1,376 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_VM_VERSION_SPARC_HPP +#define CPU_SPARC_VM_VERSION_SPARC_HPP + +#include "runtime/abstract_vm_version.hpp" +#include "runtime/globals_extension.hpp" + +class VM_Version: public Abstract_VM_Version { + friend class VMStructs; + friend class JVMCIVMStructs; + +protected: + enum { + ISA_V9, + ISA_POPC, + ISA_VIS1, + ISA_VIS2, + ISA_BLK_INIT, + ISA_FMAF, + ISA_VIS3, + ISA_HPC, + ISA_IMA, + ISA_AES, + ISA_DES, + ISA_KASUMI, + ISA_CAMELLIA, + ISA_MD5, + ISA_SHA1, + ISA_SHA256, + ISA_SHA512, + ISA_MPMUL, + ISA_MONT, + ISA_PAUSE, + ISA_CBCOND, + ISA_CRC32C, + + ISA_FJATHPLUS, + ISA_VIS3B, + ISA_ADI, + ISA_SPARC5, + ISA_MWAIT, + ISA_XMPMUL, + ISA_XMONT, + ISA_PAUSE_NSEC, + ISA_VAMASK, + + ISA_SPARC6, + ISA_DICTUNP, + ISA_FPCMPSHL, + ISA_RLE, + ISA_SHA3, + ISA_FJATHPLUS2, + ISA_VIS3C, + ISA_SPARC5B, + ISA_MME, + + // Synthesised properties: + + CPU_FAST_IDIV, + CPU_FAST_RDPC, + CPU_FAST_BIS, + CPU_FAST_LD, + CPU_FAST_CMOVE, + CPU_FAST_IND_BR, + CPU_BLK_ZEROING + }; + +private: + enum { ISA_last_feature = ISA_MME, + CPU_last_feature = CPU_BLK_ZEROING }; + + enum { + ISA_unknown_msk = 0, + + ISA_v9_msk = UINT64_C(1) << ISA_V9, + + ISA_popc_msk = UINT64_C(1) << ISA_POPC, + ISA_vis1_msk = UINT64_C(1) << ISA_VIS1, + ISA_vis2_msk = UINT64_C(1) << ISA_VIS2, + ISA_blk_init_msk = UINT64_C(1) << ISA_BLK_INIT, + ISA_fmaf_msk = UINT64_C(1) << ISA_FMAF, + ISA_vis3_msk = UINT64_C(1) << ISA_VIS3, + ISA_hpc_msk = UINT64_C(1) << ISA_HPC, + ISA_ima_msk = UINT64_C(1) << ISA_IMA, + ISA_aes_msk = UINT64_C(1) << ISA_AES, + ISA_des_msk = UINT64_C(1) << ISA_DES, + ISA_kasumi_msk = UINT64_C(1) << ISA_KASUMI, + ISA_camellia_msk = UINT64_C(1) << ISA_CAMELLIA, + ISA_md5_msk = UINT64_C(1) << ISA_MD5, + ISA_sha1_msk = UINT64_C(1) << ISA_SHA1, + ISA_sha256_msk = UINT64_C(1) << ISA_SHA256, + ISA_sha512_msk = UINT64_C(1) << ISA_SHA512, + ISA_mpmul_msk = UINT64_C(1) << ISA_MPMUL, + ISA_mont_msk = UINT64_C(1) << ISA_MONT, + ISA_pause_msk = UINT64_C(1) << ISA_PAUSE, + ISA_cbcond_msk = UINT64_C(1) << ISA_CBCOND, + ISA_crc32c_msk = UINT64_C(1) << ISA_CRC32C, + + ISA_fjathplus_msk = UINT64_C(1) << ISA_FJATHPLUS, + ISA_vis3b_msk = UINT64_C(1) << ISA_VIS3B, + ISA_adi_msk = UINT64_C(1) << ISA_ADI, + ISA_sparc5_msk = UINT64_C(1) << ISA_SPARC5, + ISA_mwait_msk = UINT64_C(1) << ISA_MWAIT, + ISA_xmpmul_msk = UINT64_C(1) << ISA_XMPMUL, + ISA_xmont_msk = UINT64_C(1) << ISA_XMONT, + ISA_pause_nsec_msk = UINT64_C(1) << ISA_PAUSE_NSEC, + ISA_vamask_msk = UINT64_C(1) << ISA_VAMASK, + + ISA_sparc6_msk = UINT64_C(1) << ISA_SPARC6, + ISA_dictunp_msk = UINT64_C(1) << ISA_DICTUNP, + ISA_fpcmpshl_msk = UINT64_C(1) << ISA_FPCMPSHL, + ISA_rle_msk = UINT64_C(1) << ISA_RLE, + ISA_sha3_msk = UINT64_C(1) << ISA_SHA3, + ISA_fjathplus2_msk = UINT64_C(1) << ISA_FJATHPLUS2, + ISA_vis3c_msk = UINT64_C(1) << ISA_VIS3C, + ISA_sparc5b_msk = UINT64_C(1) << ISA_SPARC5B, + ISA_mme_msk = UINT64_C(1) << ISA_MME, + + CPU_fast_idiv_msk = UINT64_C(1) << CPU_FAST_IDIV, + CPU_fast_rdpc_msk = UINT64_C(1) << CPU_FAST_RDPC, + CPU_fast_bis_msk = UINT64_C(1) << CPU_FAST_BIS, + CPU_fast_ld_msk = UINT64_C(1) << CPU_FAST_LD, + CPU_fast_cmove_msk = UINT64_C(1) << CPU_FAST_CMOVE, + CPU_fast_ind_br_msk = UINT64_C(1) << CPU_FAST_IND_BR, + CPU_blk_zeroing_msk = UINT64_C(1) << CPU_BLK_ZEROING, + + last_feature_msk = CPU_blk_zeroing_msk, + full_feature_msk = (last_feature_msk << 1) - 1 + }; + +/* The following, previously supported, SPARC implementations are no longer + * supported. + * + * UltraSPARC I/II: + * SPARC-V9, VIS + * UltraSPARC III/+: (Cheetah/+) + * SPARC-V9, VIS + * UltraSPARC IV: (Jaguar) + * SPARC-V9, VIS + * UltraSPARC IV+: (Panther) + * SPARC-V9, VIS, POPC + * + * The currently supported SPARC implementations are listed below (including + * generic V9 support). + * + * UltraSPARC T1: (Niagara) + * SPARC-V9, VIS, ASI_BIS (Crypto/hash in SPU) + * UltraSPARC T2: (Niagara-2) + * SPARC-V9, VIS, ASI_BIS, POPC (Crypto/hash in SPU) + * UltraSPARC T2+: (Victoria Falls, etc.) + * SPARC-V9, VIS, VIS2, ASI_BIS, POPC (Crypto/hash in SPU) + * + * UltraSPARC T3: (Rainbow Falls/C2) + * SPARC-V9, VIS, VIS2, ASI_BIS, POPC (Crypto/hash in SPU) + * + * Oracle SPARC T4/T5/M5: (Core C3) + * SPARC-V9, VIS, VIS2, VIS3, ASI_BIS, HPC, POPC, FMAF, IMA, PAUSE, CBCOND, + * AES, DES, Kasumi, Camellia, MD5, SHA1, SHA256, SHA512, CRC32C, MONT, MPMUL + * + * Oracle SPARC M7: (Core C4) + * SPARC-V9, VIS, VIS2, VIS3, ASI_BIS, HPC, POPC, FMAF, IMA, PAUSE, CBCOND, + * AES, DES, Camellia, MD5, SHA1, SHA256, SHA512, CRC32C, MONT, MPMUL, VIS3b, + * ADI, SPARC5, MWAIT, XMPMUL, XMONT, PAUSE_NSEC, VAMASK + * + * Oracle SPARC M8: (Core C5) + * SPARC-V9, VIS, VIS2, VIS3, ASI_BIS, HPC, POPC, FMAF, IMA, PAUSE, CBCOND, + * AES, DES, Camellia, MD5, SHA1, SHA256, SHA512, CRC32C, MONT, MPMUL, VIS3b, + * ADI, SPARC5, MWAIT, XMPMUL, XMONT, PAUSE_NSEC, VAMASK, SPARC6, FPCMPSHL, + * DICTUNP, RLE, SHA3, MME + * + * NOTE: Oracle Number support ignored. + */ + enum { + niagara1_msk = ISA_v9_msk | ISA_vis1_msk | ISA_blk_init_msk, + niagara2_msk = niagara1_msk | ISA_popc_msk, + + core_C2_msk = niagara2_msk | ISA_vis2_msk, + + core_C3_msk = core_C2_msk | ISA_fmaf_msk | ISA_vis3_msk | ISA_hpc_msk | + ISA_ima_msk | ISA_aes_msk | ISA_des_msk | ISA_kasumi_msk | + ISA_camellia_msk | ISA_md5_msk | ISA_sha1_msk | ISA_sha256_msk | + ISA_sha512_msk | ISA_mpmul_msk | ISA_mont_msk | ISA_pause_msk | + ISA_cbcond_msk | ISA_crc32c_msk, + + core_C4_msk = core_C3_msk - ISA_kasumi_msk | + ISA_vis3b_msk | ISA_adi_msk | ISA_sparc5_msk | ISA_mwait_msk | + ISA_xmpmul_msk | ISA_xmont_msk | ISA_pause_nsec_msk | ISA_vamask_msk, + + core_C5_msk = core_C4_msk | ISA_sparc6_msk | ISA_dictunp_msk | + ISA_fpcmpshl_msk | ISA_rle_msk | ISA_sha3_msk | ISA_mme_msk, + + ultra_sparc_t1_msk = niagara1_msk, + ultra_sparc_t2_msk = niagara2_msk, + ultra_sparc_t3_msk = core_C2_msk, + ultra_sparc_m5_msk = core_C3_msk, // NOTE: First out-of-order pipeline. + ultra_sparc_m7_msk = core_C4_msk, + ultra_sparc_m8_msk = core_C5_msk + }; + + static uint _L2_data_cache_line_size; + static uint L2_data_cache_line_size() { return _L2_data_cache_line_size; } + + static void determine_features(); + static void platform_features(); + static void print_features(); + +public: + enum { + // Adopt a conservative behaviour (modelling single-insn-fetch-n-issue) for + // Niagara (and SPARC64). While there are at least two entries/slots in the + // instruction fetch buffer on any Niagara core (and as many as eight on a + // SPARC64), the performance improvement from keeping hot branch targets on + // optimally aligned addresses is such a small one (if any) that we choose + // not to use the extra code space required. + + insn_fetch_alignment = 4 // Byte alignment in L1 insn. cache. + }; + + static void initialize(); + + static void init_before_ergo() { determine_features(); } + + // Instruction feature support: + + static bool has_v9() { return (_features & ISA_v9_msk) != 0; } + static bool has_popc() { return (_features & ISA_popc_msk) != 0; } + static bool has_vis1() { return (_features & ISA_vis1_msk) != 0; } + static bool has_vis2() { return (_features & ISA_vis2_msk) != 0; } + static bool has_blk_init() { return (_features & ISA_blk_init_msk) != 0; } + static bool has_fmaf() { return (_features & ISA_fmaf_msk) != 0; } + static bool has_vis3() { return (_features & ISA_vis3_msk) != 0; } + static bool has_hpc() { return (_features & ISA_hpc_msk) != 0; } + static bool has_ima() { return (_features & ISA_ima_msk) != 0; } + static bool has_aes() { return (_features & ISA_aes_msk) != 0; } + static bool has_des() { return (_features & ISA_des_msk) != 0; } + static bool has_kasumi() { return (_features & ISA_kasumi_msk) != 0; } + static bool has_camellia() { return (_features & ISA_camellia_msk) != 0; } + static bool has_md5() { return (_features & ISA_md5_msk) != 0; } + static bool has_sha1() { return (_features & ISA_sha1_msk) != 0; } + static bool has_sha256() { return (_features & ISA_sha256_msk) != 0; } + static bool has_sha512() { return (_features & ISA_sha512_msk) != 0; } + static bool has_mpmul() { return (_features & ISA_mpmul_msk) != 0; } + static bool has_mont() { return (_features & ISA_mont_msk) != 0; } + static bool has_pause() { return (_features & ISA_pause_msk) != 0; } + static bool has_cbcond() { return (_features & ISA_cbcond_msk) != 0; } + static bool has_crc32c() { return (_features & ISA_crc32c_msk) != 0; } + + static bool has_athena_plus() { return (_features & ISA_fjathplus_msk) != 0; } + static bool has_vis3b() { return (_features & ISA_vis3b_msk) != 0; } + static bool has_adi() { return (_features & ISA_adi_msk) != 0; } + static bool has_sparc5() { return (_features & ISA_sparc5_msk) != 0; } + static bool has_mwait() { return (_features & ISA_mwait_msk) != 0; } + static bool has_xmpmul() { return (_features & ISA_xmpmul_msk) != 0; } + static bool has_xmont() { return (_features & ISA_xmont_msk) != 0; } + static bool has_pause_nsec() { return (_features & ISA_pause_nsec_msk) != 0; } + static bool has_vamask() { return (_features & ISA_vamask_msk) != 0; } + + static bool has_sparc6() { return (_features & ISA_sparc6_msk) != 0; } + static bool has_dictunp() { return (_features & ISA_dictunp_msk) != 0; } + static bool has_fpcmpshl() { return (_features & ISA_fpcmpshl_msk) != 0; } + static bool has_rle() { return (_features & ISA_rle_msk) != 0; } + static bool has_sha3() { return (_features & ISA_sha3_msk) != 0; } + static bool has_athena_plus2() { return (_features & ISA_fjathplus2_msk) != 0; } + static bool has_vis3c() { return (_features & ISA_vis3c_msk) != 0; } + static bool has_sparc5b() { return (_features & ISA_sparc5b_msk) != 0; } + static bool has_mme() { return (_features & ISA_mme_msk) != 0; } + + static bool has_fast_idiv() { return (_features & CPU_fast_idiv_msk) != 0; } + static bool has_fast_rdpc() { return (_features & CPU_fast_rdpc_msk) != 0; } + static bool has_fast_bis() { return (_features & CPU_fast_bis_msk) != 0; } + static bool has_fast_ld() { return (_features & CPU_fast_ld_msk) != 0; } + static bool has_fast_cmove() { return (_features & CPU_fast_cmove_msk) != 0; } + + // If indirect and direct branching is equally fast. + static bool has_fast_ind_br() { return (_features & CPU_fast_ind_br_msk) != 0; } + // If SPARC BIS to the beginning of cache line always zeros it. + static bool has_blk_zeroing() { return (_features & CPU_blk_zeroing_msk) != 0; } + + static bool supports_compare_and_exchange() { return true; } + + // FIXME: To be removed. + static bool is_post_niagara() { + return (_features & niagara2_msk) == niagara2_msk; + } + + // Default prefetch block size on SPARC. + static uint prefetch_data_size() { return L2_data_cache_line_size(); } + + private: + // Prefetch policy and characteristics: + // + // These support routines are used in order to isolate any CPU/core specific + // logic from the actual flag/option processing. They should reflect the HW + // characteristics for the associated options on the current platform. + // + // The three Prefetch* options below (assigned -1 in the configuration) are + // treated according to (given the accepted range [-1..]): + // -1: Determine a proper HW-specific value for the current HW. + // 0: Off + // >0: Command-line supplied value to use. + // + // FIXME: The documentation string in the configuration is wrong, saying that + // -1 is also interpreted as off. + // + static intx prefetch_copy_interval_in_bytes() { + intx bytes = PrefetchCopyIntervalInBytes; + return bytes < 0 ? 512 : bytes; + } + static intx prefetch_scan_interval_in_bytes() { + intx bytes = PrefetchScanIntervalInBytes; + return bytes < 0 ? 512 : bytes; + } + static intx prefetch_fields_ahead() { + intx count = PrefetchFieldsAhead; + return count < 0 ? 0 : count; + } + + // AllocatePrefetchDistance is treated under the same interpretation as the + // Prefetch* options above (i.e., -1, 0, >0). + static intx allocate_prefetch_distance() { + intx count = AllocatePrefetchDistance; + return count < 0 ? 512 : count; + } + + // AllocatePrefetchStyle is guaranteed to be in range [0..3] defined by the + // configuration. + static intx allocate_prefetch_style() { + intx distance = allocate_prefetch_distance(); + // Return 0 (off/none) if AllocatePrefetchDistance was not defined. + return distance > 0 ? AllocatePrefetchStyle : 0; + } + + public: + // Assembler testing + static void allow_all(); + static void revert(); + + // Override the Abstract_VM_Version implementation. + // + // FIXME: Removed broken test on sun4v (always false when invoked prior to the + // proper capability setup), thus always returning 2. Still need to fix + // this properly in order to enable complete page size support. + static uint page_size_count() { return 2; } + + // Override default denominator for ParallelGCThreads. + // + // FIXME: Simply checking the processor family is insufficient. + static uint parallel_worker_threads_denominator() { + return is_post_niagara() ? 16 : 8; + } +}; + +#endif // CPU_SPARC_VM_VERSION_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/vmreg_sparc.cpp b/src/hotspot/cpu/sparc/vmreg_sparc.cpp --- a/src/hotspot/cpu/sparc/vmreg_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/vmreg_sparc.cpp 2023-04-16 11:42:11.079939781 +0000 @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "code/vmreg.hpp" + + + +void VMRegImpl::set_regName() { + Register reg = ::as_Register(0); + int i; + for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) { + regName[i++ ] = reg->name(); + regName[i++ ] = reg->name(); + reg = reg->successor(); + } + + FloatRegister freg = ::as_FloatRegister(0); + for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { + regName[i++] = freg->name(); + if (freg->encoding() > 31) { + regName[i++] = freg->name(); + } + freg = freg->successor(); + } + + for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) { + regName[i] = "NON-GPR-FPR"; + } +} + +VMReg VMRegImpl::vmStorageToVMReg(int type, int index) { + Unimplemented(); + return VMRegImpl::Bad(); +} diff -ur --new-file a/src/hotspot/cpu/sparc/vmreg_sparc.hpp b/src/hotspot/cpu/sparc/vmreg_sparc.hpp --- a/src/hotspot/cpu/sparc/vmreg_sparc.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/vmreg_sparc.hpp 2023-04-16 11:42:11.080042627 +0000 @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_VMREG_SPARC_HPP +#define CPU_SPARC_VMREG_SPARC_HPP + +inline bool is_Register() { return value() >= 0 && value() < ConcreteRegisterImpl::max_gpr; } +inline bool is_FloatRegister() { return value() >= ConcreteRegisterImpl::max_gpr && + value() < ConcreteRegisterImpl::max_fpr; } +inline Register as_Register() { + assert( is_Register() && is_even(value()), "even-aligned GPR name" ); + // Yuk + return ::as_Register(value()>>1); +} + +inline FloatRegister as_FloatRegister() { + assert( is_FloatRegister(), "must be" ); + // Yuk + return ::as_FloatRegister( value() - ConcreteRegisterImpl::max_gpr ); +} + +inline bool is_concrete() { + assert(is_reg(), "must be"); + int v = value(); + if ( v < ConcreteRegisterImpl::max_gpr ) { + return is_even(v); + } + // F0..F31 + if ( v <= ConcreteRegisterImpl::max_gpr + 31) return true; + if ( v < ConcreteRegisterImpl::max_fpr) { + return is_even(v); + } + assert(false, "what register?"); + return false; +} + +#endif // CPU_SPARC_VMREG_SPARC_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/vmreg_sparc.inline.hpp b/src/hotspot/cpu/sparc/vmreg_sparc.inline.hpp --- a/src/hotspot/cpu/sparc/vmreg_sparc.inline.hpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/vmreg_sparc.inline.hpp 2023-04-16 11:42:11.080144151 +0000 @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_VMREG_SPARC_INLINE_HPP +#define CPU_SPARC_VMREG_SPARC_INLINE_HPP + +inline VMReg RegisterImpl::as_VMReg() { + if( this==noreg ) return VMRegImpl::Bad(); + return VMRegImpl::as_VMReg(encoding() << 1 ); +} + +inline VMReg FloatRegisterImpl::as_VMReg() { return VMRegImpl::as_VMReg( ConcreteRegisterImpl::max_gpr + encoding() ); } + + + +#endif // CPU_SPARC_VMREG_SPARC_INLINE_HPP diff -ur --new-file a/src/hotspot/cpu/sparc/vtableStubs_sparc.cpp b/src/hotspot/cpu/sparc/vtableStubs_sparc.cpp --- a/src/hotspot/cpu/sparc/vtableStubs_sparc.cpp 1970-01-01 01:00:00.000000000 +0000 +++ b/src/hotspot/cpu/sparc/vtableStubs_sparc.cpp 2023-04-16 11:42:11.080348564 +0000 @@ -0,0 +1,268 @@ +/* + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/vtableStubs.hpp" +#include "interp_masm_sparc.hpp" +#include "memory/resourceArea.hpp" +#include "oops/compiledICHolder.hpp" +#include "oops/instanceKlass.hpp" +#include "oops/klassVtable.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_sparc.inline.hpp" +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif + +// machine-dependent part of VtableStubs: create vtableStub of correct size and +// initialize its code + +#define __ masm-> + +#ifndef PRODUCT +extern "C" void bad_compiled_vtable_index(JavaThread* thread, oopDesc* receiver, int index); +#endif + + +// Used by compiler only; may use only caller saved, non-argument registers +VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { + // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. + const int stub_code_length = code_size_limit(true); + VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); + // Can be NULL if there is no free space in the code cache. + if (s == NULL) { + return NULL; + } + + // Count unused bytes in instruction sequences of variable size. + // We add them to the computed buffer size in order to avoid + // overflow in subsequently generated stubs. + address start_pc; + int slop_bytes = 0; + int slop_delta = 0; + const int index_dependent_slop = ((vtable_index < 512) ? 2 : 0)*BytesPerInstWord; // code size change with transition from 13-bit to 32-bit constant (@index == 512?). + + ResourceMark rm; + CodeBuffer cb(s->entry_point(), stub_code_length); + MacroAssembler* masm = new MacroAssembler(&cb); + +#if (!defined(PRODUCT) && defined(COMPILER2)) + if (CountCompiledCalls) { + __ inc_counter(SharedRuntime::nof_megamorphic_calls_addr(), G5, G3_scratch); + } +#endif // PRODUCT + + assert(VtableStub::receiver_location() == O0->as_VMReg(), "receiver expected in O0"); + + // get receiver klass + address npe_addr = __ pc(); + __ load_klass(O0, G3_scratch); + +#ifndef PRODUCT + if (DebugVtables) { + Label L; + // check offset vs vtable length + __ ld(G3_scratch, in_bytes(Klass::vtable_length_offset()), G5); + __ cmp_and_br_short(G5, vtable_index*vtableEntry::size(), Assembler::greaterUnsigned, Assembler::pt, L); + + // set generates 8 instructions (worst case), 1 instruction (best case) + start_pc = __ pc(); + __ set(vtable_index, O2); + slop_delta = __ worst_case_insts_for_set()*BytesPerInstWord - (__ pc() - start_pc); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); + + // there is no variance in call_VM() emitted code. + __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), O0, O2); + __ bind(L); + } +#endif + + // set Method* (in case of interpreted method), and destination address + start_pc = __ pc(); + __ lookup_virtual_method(G3_scratch, vtable_index, G5_method); + // lookup_virtual_method generates 3 instructions (worst case), 1 instruction (best case) + slop_delta = 3*BytesPerInstWord - (int)(__ pc() - start_pc); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); + +#ifndef PRODUCT + if (DebugVtables) { + Label L; + __ br_notnull_short(G5_method, Assembler::pt, L); + __ stop("Vtable entry is ZERO"); + __ bind(L); + } +#endif + + address ame_addr = __ pc(); // if the vtable entry is null, the method is abstract + // NOTE: for vtable dispatches, the vtable entry will never be null. + + __ ld_ptr(G5_method, in_bytes(Method::from_compiled_offset()), G3_scratch); + + // jump to target (either compiled code or c2iadapter) + __ JMP(G3_scratch, 0); + // load Method* (in case we call c2iadapter) + __ delayed()->nop(); + + masm->flush(); + slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets + bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop); + + return s; +} + + +VtableStub* VtableStubs::create_itable_stub(int itable_index) { + // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. + const int stub_code_length = code_size_limit(false); + VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); + // Can be NULL if there is no free space in the code cache. + if (s == NULL) { + return NULL; + } + // Count unused bytes in instruction sequences of variable size. + // We add them to the computed buffer size in order to avoid + // overflow in subsequently generated stubs. + address start_pc; + int slop_bytes = 0; + int slop_delta = 0; + const int index_dependent_slop = ((itable_index < 512) ? 2 : 0)*BytesPerInstWord; // code size change with transition from 13-bit to 32-bit constant (@index == 512?). + + ResourceMark rm; + CodeBuffer cb(s->entry_point(), stub_code_length); + MacroAssembler* masm = new MacroAssembler(&cb); + +#if (!defined(PRODUCT) && defined(COMPILER2)) + if (CountCompiledCalls) { +// Use G3_scratch, G4_scratch as work regs for inc_counter. +// These are defined before use further down. + __ inc_counter(SharedRuntime::nof_megamorphic_calls_addr(), G3_scratch, G4_scratch); + } +#endif // PRODUCT + + Register G3_Klass = G3_scratch; + Register G5_icholder = G5; // Passed in as an argument + Register G4_interface = G4_scratch; + + // Entry arguments: + // G5_interface: Interface + // O0: Receiver + assert(VtableStub::receiver_location() == O0->as_VMReg(), "receiver expected in O0"); + + // get receiver klass (also an implicit null-check) + address npe_addr = __ pc(); + __ load_klass(O0, G3_Klass); + + // Push a new window to get some temp registers. This chops the head of all + // my 64-bit %o registers in the LION build, but this is OK because no longs + // are passed in the %o registers. Instead, longs are passed in G1 and G4 + // and so those registers are not available here. + __ save(SP,-frame::register_save_words*wordSize,SP); + + Label L_no_such_interface; + Register L5_method = L5; + + start_pc = __ pc(); + + // Receiver subtype check against REFC. + __ ld_ptr(G5_icholder, CompiledICHolder::holder_klass_offset(), G4_interface); + __ lookup_interface_method(// inputs: rec. class, interface, itable index + G3_Klass, G4_interface, itable_index, + // outputs: scan temp. reg1, scan temp. reg2 + L5_method, L2, L3, + L_no_such_interface, + /*return_method=*/ false); + + const ptrdiff_t typecheckSize = __ pc() - start_pc; + start_pc = __ pc(); + + // Get Method* and entrypoint for compiler + __ ld_ptr(G5_icholder, CompiledICHolder::holder_metadata_offset(), G4_interface); + __ lookup_interface_method(// inputs: rec. class, interface, itable index + G3_Klass, G4_interface, itable_index, + // outputs: method, scan temp. reg + L5_method, L2, L3, + L_no_such_interface); + + const ptrdiff_t lookupSize = __ pc() - start_pc; + + // Reduce "estimate" such that "padding" does not drop below 8. + // Do not target a left-over number of zero, because a very + // large vtable or itable offset (> 4K) will require an extra + // sethi/or pair of instructions. + // Found typecheck(60) + lookup(72) to exceed previous estimate (32*4). + const ptrdiff_t estimate = 36*BytesPerInstWord; + const ptrdiff_t codesize = typecheckSize + lookupSize + index_dependent_slop; + slop_delta = (int)(estimate - codesize); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize); + +#ifndef PRODUCT + if (DebugVtables) { + Label L01; + __ br_notnull_short(L5_method, Assembler::pt, L01); + __ stop("Method* is null"); + __ bind(L01); + } +#endif + + // If the following load is through a NULL pointer, we'll take an OS + // exception that should translate into an AbstractMethodError. We need the + // window count to be correct at that time. + __ restore(L5_method, 0, G5_method); + // Restore registers *before* the AME point. + + address ame_addr = __ pc(); // if the vtable entry is null, the method is abstract + __ ld_ptr(G5_method, in_bytes(Method::from_compiled_offset()), G3_scratch); + + // G5_method: Method* + // O0: Receiver + // G3_scratch: entry point + __ JMP(G3_scratch, 0); + __ delayed()->nop(); + + __ bind(L_no_such_interface); + // Handle IncompatibleClassChangeError in itable stubs. + // More detailed error message. + // We force resolving of the call site by jumping to the "handle + // wrong method" stub, and so let the interpreter runtime do all the + // dirty work. + AddressLiteral icce(SharedRuntime::get_handle_wrong_method_stub()); + __ jump_to(icce, G3_scratch); + __ delayed()->restore(); + + masm->flush(); + slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets + bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, index_dependent_slop); + + return s; +} + +int VtableStub::pd_code_alignment() { + // UltraSPARC cache line size is 8 instructions: + const unsigned int icache_line_size = 32; + return icache_line_size; +} diff -ur -N a/src/hotspot/cpu/sparc/gc/g1/g1Globals_sparc.hpp b/src/hotspot/cpu/sparc/gc/g1/g1Globals_sparc.hpp --- a/src/hotspot/cpu/sparc/gc/g1/g1Globals_sparc.hpp 1970-01-01 01:00:00.000000000 +0100 +++ b/src/hotspot/cpu/sparc/gc/g1/g1Globals_sparc.hpp 2024-03-05 08:51:35.365083852 +0000 @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#ifndef CPU_SPARC_GC_G1_G1GLOBALS_SPARC_HPP +#define CPU_SPARC_GC_G1_G1GLOBALS_SPARC_HPP + +const size_t G1MergeHeapRootsPrefetchCacheSize = 8; + +#endif // CPU_SPARC_GC_G1_G1GLOBALS_SPARC_HPP diff -ur -N a/src/hotspot/cpu/sparc/matcher_sparc.hpp b/src/hotspot/cpu/sparc/matcher_sparc.hpp --- a/src/hotspot/cpu/sparc/matcher_sparc.hpp 1970-01-01 01:00:00.000000000 +0100 +++ b/src/hotspot/cpu/sparc/matcher_sparc.hpp 2024-03-05 09:21:09.882520838 +0000 @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_MATCHER_SPARC_HPP +#define CPU_SPARC_MATCHER_SPARC_HPP + + // Defined within class Matcher + + + // Should correspond to setting above + static const bool init_array_count_is_in_bytes = true; + + // Whether this platform implements the scalable vector feature + static const bool implements_scalable_vector = false; + + static constexpr const bool supports_scalable_vector() { + return false; + } + + // SPARC doesn't support misaligned vectors store/load. + static constexpr bool misaligned_vectors_ok() { + return false; + } + + // Whether code generation need accurate ConvI2L types. + static const bool convi2l_type_required = true; + + // Do the processor's shift instructions only use the low 5/6 bits + // of the count for 32/64 bit ints? If not we need to do the masking + // ourselves. + static const bool need_masked_shift_count = false; + + // Does the CPU require late expand (see block.cpp for description of late expand)? + static const bool require_postalloc_expand = false; + + // No support for generic vector operands. + static const bool supports_generic_vector_operands = false; + + static constexpr bool isSimpleConstant64(jlong value) { + // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. + // Depends on optimizations in MacroAssembler::setx. + int hi = (int)(value >> 32); + int lo = (int)(value & ~0); + return (hi == 0) || (hi == -1) || (lo == 0); + } + + // No additional cost for CMOVL. + static const int long_cmove_cost() { return 0; } + + // CMOVF/CMOVD are expensive on e.g., T4 and SPARC64. + static const int float_cmove_cost() { + return VM_Version::has_fast_cmove() ? 0 : ConditionalMoveLimit; + } + + // Set this as clone_shift_expressions. + static bool narrow_oop_use_complex_address() { + assert(UseCompressedOops, "only for compressed oops code"); + return false; + } + + static bool narrow_klass_use_complex_address() { + assert(UseCompressedClassPointers, "only for compressed klass code"); + return false; + } + + static bool const_oop_prefer_decode() { + // TODO: Check if loading ConP from TOC in heap-based mode is better: + // Prefer ConN+DecodeN over ConP in simple compressed oops mode. + // return CompressedOops::base() == NULL; + return true; + } + + static bool const_klass_prefer_decode() { + // TODO: Check if loading ConP from TOC in heap-based mode is better: + // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. + // return CompressedKlassPointers::base() == NULL; + return true; + } + + // Is it better to copy float constants, or load them directly from memory? + // Most RISCs will have to materialize an address into a + // register first, so they would do better to copy the constant from stack. + static const bool rematerialize_float_constants = false; + + // If CPU can load and store mis-aligned doubles directly then no fixup is + // needed. Else we split the double into 2 integer pieces and move it + // piece-by-piece. Only happens when passing doubles into C code as the + // Java calling convention forces doubles to be aligned. + static const bool misaligned_doubles_ok = true; + + // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode. + static const bool strict_fp_requires_explicit_rounding = false; + + // Do floats take an entire double register or just half? + // Sparc does not handle callee-save floats. + static constexpr bool float_in_double() { return false; } + + // Do ints take an entire long register or just half? + // The relevant question is how the int is callee-saved: + // the whole long is written but de-opt'ing will have to extract + // the relevant 32 bits. + static const bool int_in_long = true; + + // Does the CPU supports vector variable shift instructions? + static constexpr bool supports_vector_variable_shifts(void) { + return false; + } + + // Does the CPU supports vector variable rotate instructions? + static constexpr bool supports_vector_variable_rotates(void) { + return false; + } + + // Does the CPU supports vector unsigned comparison instructions? + static constexpr bool supports_vector_comparison_unsigned(int vlen, BasicType bt) { + return false; + } + + // Some microarchitectures have mask registers used on vectors + static constexpr bool has_predicated_vectors(void) { + return false; + } + + // true means we have fast l2f convers + // false means that conversion is done by runtime call + // NOTE: All currently supported SPARC HW provides fast conversion. + static constexpr bool convL2FSupported(void) { + return true; + } + + // Implements a variant of EncodeISOArrayNode that encode ASCII only + static const bool supports_encode_ascii_array = false; + +#endif // CPU_SPARC_MATCHER_SPARC_HPP