!26 Add several enhancement patches & Remove fast-serializer-jdk8.patch

Merge pull request !26 from jdkboy/master
This commit is contained in:
openeuler-ci-bot 2020-09-02 09:08:48 +08:00 committed by Gitee
commit d8648983fa
10 changed files with 2623 additions and 709 deletions

View File

@ -0,0 +1,87 @@
diff --git a/jdk/src/share/classes/sun/security/ssl/RandomCookie.java b/jdk/src/share/classes/sun/security/ssl/RandomCookie.java
index 5f414c408..ce27f0df4 100644
--- a/jdk/src/share/classes/sun/security/ssl/RandomCookie.java
+++ b/jdk/src/share/classes/sun/security/ssl/RandomCookie.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1996, 2007, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -41,21 +41,8 @@ final class RandomCookie {
byte random_bytes[]; // exactly 32 bytes
RandomCookie(SecureRandom generator) {
- long temp = System.currentTimeMillis() / 1000;
- int gmt_unix_time;
- if (temp < Integer.MAX_VALUE) {
- gmt_unix_time = (int) temp;
- } else {
- gmt_unix_time = Integer.MAX_VALUE; // Whoops!
- }
-
random_bytes = new byte[32];
generator.nextBytes(random_bytes);
-
- random_bytes[0] = (byte)(gmt_unix_time >> 24);
- random_bytes[1] = (byte)(gmt_unix_time >> 16);
- random_bytes[2] = (byte)(gmt_unix_time >> 8);
- random_bytes[3] = (byte)gmt_unix_time;
}
RandomCookie(HandshakeInStream m) throws IOException {
@@ -68,22 +55,15 @@ final class RandomCookie {
}
void print(PrintStream s) {
- int i, gmt_unix_time;
-
- gmt_unix_time = random_bytes[0] << 24;
- gmt_unix_time += random_bytes[1] << 16;
- gmt_unix_time += random_bytes[2] << 8;
- gmt_unix_time += random_bytes[3];
-
- s.print("GMT: " + gmt_unix_time + " ");
- s.print("bytes = { ");
-
- for (i = 4; i < 32; i++) {
- if (i != 4) {
- s.print(", ");
+ s.print("random_bytes = {");
+ for (int i = 0; i < 32; i++) {
+ int k = random_bytes[i] & 0xFF;
+ if (i != 0) {
+ s.print(' ');
}
- s.print(random_bytes[i] & 0x0ff);
+ s.print(Utilities.hexDigits[k >>> 4]);
+ s.print(Utilities.hexDigits[k & 0xf]);
}
- s.println(" }");
+ s.println("}");
}
}
diff --git a/jdk/src/share/classes/sun/security/ssl/Utilities.java b/jdk/src/share/classes/sun/security/ssl/Utilities.java
index aefb02c9a..9b267f6e1 100644
--- a/jdk/src/share/classes/sun/security/ssl/Utilities.java
+++ b/jdk/src/share/classes/sun/security/ssl/Utilities.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -33,6 +33,11 @@ import sun.net.util.IPAddressUtil;
* A utility class to share the static methods.
*/
final class Utilities {
+ /**
+ * hex digits
+ */
+ static final char[] hexDigits = "0123456789ABCDEF".toCharArray();
+
/**
* Puts {@code hostname} into the {@code serverNames} list.
* <P>

View File

@ -0,0 +1,28 @@
diff --git a/hotspot/src/cpu/aarch64/vm/frame_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/frame_aarch64.cpp
index 65a441240..1e534d3da 100644
--- a/hotspot/src/cpu/aarch64/vm/frame_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/frame_aarch64.cpp
@@ -71,10 +71,20 @@ bool frame::safe_for_sender(JavaThread *thread) {
return false;
}
- // unextended sp must be within the stack and above or equal sp
- bool unextended_sp_safe = (unextended_sp < thread->stack_base()) &&
- (unextended_sp >= sp);
+ // When we are running interpreted code the machine stack pointer, SP, is
+ // set low enough so that the Java expression stack can grow and shrink
+ // without ever exceeding the machine stack bounds. So, ESP >= SP.
+ // When we call out of an interpreted method, SP is incremented so that
+ // the space between SP and ESP is removed. The SP saved in the callee's
+ // frame is the SP *before* this increment. So, when we walk a stack of
+ // interpreter frames the sender's SP saved in a frame might be less than
+ // the SP at the point of call.
+
+ // So unextended sp must be within the stack but we need not to check
+ // that unextended sp >= sp
+
+ bool unextended_sp_safe = (unextended_sp < thread->stack_base());
if (!unextended_sp_safe) {
return false;
}

View File

@ -0,0 +1,558 @@
diff --git a/hotspot/src/share/vm/opto/chaitin.hpp b/hotspot/src/share/vm/opto/chaitin.hpp
index de6d443cd..abbd4449f 100644
--- a/hotspot/src/share/vm/opto/chaitin.hpp
+++ b/hotspot/src/share/vm/opto/chaitin.hpp
@@ -111,9 +111,9 @@ public:
_msize_valid=1;
if (_is_vector) {
assert(!_fat_proj, "sanity");
- _mask.verify_sets(_num_regs);
+ assert(_mask.is_aligned_sets(_num_regs), "mask is not aligned, adjacent sets");
} else if (_num_regs == 2 && !_fat_proj) {
- _mask.verify_pairs();
+ assert(_mask.is_aligned_pairs(), "mask is not aligned, adjacent pairs");
}
#endif
}
diff --git a/hotspot/src/share/vm/opto/regmask.cpp b/hotspot/src/share/vm/opto/regmask.cpp
index 352ccfb9d..d92f09eb6 100644
--- a/hotspot/src/share/vm/opto/regmask.cpp
+++ b/hotspot/src/share/vm/opto/regmask.cpp
@@ -74,7 +74,8 @@ int find_lowest_bit( uint32 mask ) {
}
// Find highest 1, or return 32 if empty
-int find_hihghest_bit( uint32 mask ) {
+int find_highest_bit( uint32 mask ) {
+ assert(mask != 0, "precondition");
int n = 0;
if( mask > 0xffff ) {
mask >>= 16;
@@ -167,13 +168,14 @@ OptoReg::Name RegMask::find_first_pair() const {
//------------------------------ClearToPairs-----------------------------------
// Clear out partial bits; leave only bit pairs
void RegMask::clear_to_pairs() {
- for( int i = 0; i < RM_SIZE; i++ ) {
+ assert(valid_watermarks(), "sanity");
+ for( int i = _lwm; i < _hwm; i++ ) {
int bits = _A[i];
bits &= ((bits & 0x55555555)<<1); // 1 hi-bit set for each pair
bits |= (bits>>1); // Smear 1 hi-bit into a pair
_A[i] = bits;
}
- verify_pairs();
+ assert(is_aligned_pairs(), "mask is not aligned, adjacent pairs");
}
//------------------------------SmearToPairs-----------------------------------
@@ -188,10 +190,14 @@ void RegMask::smear_to_pairs() {
verify_pairs();
}
-//------------------------------is_aligned_pairs-------------------------------
+bool RegMask::is_misaligned_pair() const {
+ return Size() == 2 && !is_aligned_pairs();
+}
+
bool RegMask::is_aligned_pairs() const {
// Assert that the register mask contains only bit pairs.
- for( int i = 0; i < RM_SIZE; i++ ) {
+ assert(valid_watermarks(), "sanity");
+ for( int i = _lwm; i < _hwm; i++ ) {
int bits = _A[i];
while( bits ) { // Check bits for pairing
int bit = bits & -bits; // Extract low bit
@@ -206,39 +212,28 @@ bool RegMask::is_aligned_pairs() const {
return true;
}
-//------------------------------is_bound1--------------------------------------
-// Return TRUE if the mask contains a single bit
-int RegMask::is_bound1() const {
- if( is_AllStack() ) return false;
- int bit = -1; // Set to hold the one bit allowed
- for( int i = 0; i < RM_SIZE; i++ ) {
- if( _A[i] ) { // Found some bits
- if( bit != -1 ) return false; // Already had bits, so fail
- bit = _A[i] & -_A[i]; // Extract 1 bit from mask
- if( bit != _A[i] ) return false; // Found many bits, so fail
- }
- }
- // True for both the empty mask and for a single bit
- return true;
+bool RegMask::is_bound1() const {
+ if (is_AllStack()) return false;
+ return Size() == 1;
}
//------------------------------is_bound2--------------------------------------
// Return TRUE if the mask contains an adjacent pair of bits and no other bits.
-int RegMask::is_bound_pair() const {
+bool RegMask::is_bound_pair() const {
if( is_AllStack() ) return false;
-
+ assert(valid_watermarks(), "sanity");
int bit = -1; // Set to hold the one bit allowed
- for( int i = 0; i < RM_SIZE; i++ ) {
- if( _A[i] ) { // Found some bits
- if( bit != -1 ) return false; // Already had bits, so fail
- bit = _A[i] & -(_A[i]); // Extract 1 bit from mask
- if( (bit << 1) != 0 ) { // Bit pair stays in same word?
+ for( int i = _lwm; i <= _hwm; i++ ) {
+ if( _A[i] ) { // Found some bits
+ if( bit != -1) return false; // Already had bits, so fail
+ bit = _A[i] & -(_A[i]); // Extract 1 bit from mask
+ if( (bit << 1) != 0 ) { // Bit pair stays in same word?
if( (bit | (bit<<1)) != _A[i] )
- return false; // Require adjacent bit pair and no more bits
- } else { // Else its a split-pair case
+ return false; // Require adjacent bit pair and no more bits
+ } else { // Else its a split-pair case
if( bit != _A[i] ) return false; // Found many bits, so fail
- i++; // Skip iteration forward
- if( i >= RM_SIZE || _A[i] != 1 )
+ i++; // Skip iteration forward
+ if( i > _hwm || _A[i] != 1 )
return false; // Require 1 lo bit in next word
}
}
@@ -247,31 +242,44 @@ int RegMask::is_bound_pair() const {
return true;
}
+// Test for a single adjacent set of ideal register's size.
+bool RegMask::is_bound(uint ireg) const {
+ if (is_vector(ireg)) {
+ if (is_bound_set(num_registers(ireg)))
+ return true;
+ } else if (is_bound1() || is_bound_pair()) {
+ return true;
+ }
+ return false;
+}
+
+
+
static int low_bits[3] = { 0x55555555, 0x11111111, 0x01010101 };
-//------------------------------find_first_set---------------------------------
+
// Find the lowest-numbered register set in the mask. Return the
// HIGHEST register number in the set, or BAD if no sets.
// Works also for size 1.
OptoReg::Name RegMask::find_first_set(const int size) const {
- verify_sets(size);
- for (int i = 0; i < RM_SIZE; i++) {
+ assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
+ assert(valid_watermarks(), "sanity");
+ for (int i = _lwm; i <= _hwm; i++) {
if (_A[i]) { // Found some bits
- int bit = _A[i] & -_A[i]; // Extract low bit
// Convert to bit number, return hi bit in pair
- return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(bit)+(size-1));
+ return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(_A[i])+(size-1));
}
}
return OptoReg::Bad;
}
-//------------------------------clear_to_sets----------------------------------
// Clear out partial bits; leave only aligned adjacent bit pairs
void RegMask::clear_to_sets(const int size) {
if (size == 1) return;
assert(2 <= size && size <= 8, "update low bits table");
assert(is_power_of_2(size), "sanity");
+ assert(valid_watermarks(), "sanity");
int low_bits_mask = low_bits[size>>2];
- for (int i = 0; i < RM_SIZE; i++) {
+ for (int i = _lwm; i <= _hwm; i++) {
int bits = _A[i];
int sets = (bits & low_bits_mask);
for (int j = 1; j < size; j++) {
@@ -286,17 +294,17 @@ void RegMask::clear_to_sets(const int size) {
}
_A[i] = sets;
}
- verify_sets(size);
+ assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
}
-//------------------------------smear_to_sets----------------------------------
// Smear out partial bits to aligned adjacent bit sets
void RegMask::smear_to_sets(const int size) {
if (size == 1) return;
assert(2 <= size && size <= 8, "update low bits table");
assert(is_power_of_2(size), "sanity");
+ assert(valid_watermarks(), "sanity");
int low_bits_mask = low_bits[size>>2];
- for (int i = 0; i < RM_SIZE; i++) {
+ for (int i = _lwm; i <= _hwm; i++) {
int bits = _A[i];
int sets = 0;
for (int j = 0; j < size; j++) {
@@ -312,17 +320,17 @@ void RegMask::smear_to_sets(const int size) {
}
_A[i] = sets;
}
- verify_sets(size);
+ assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
}
-//------------------------------is_aligned_set--------------------------------
+// Assert that the register mask contains only bit sets.
bool RegMask::is_aligned_sets(const int size) const {
if (size == 1) return true;
assert(2 <= size && size <= 8, "update low bits table");
assert(is_power_of_2(size), "sanity");
int low_bits_mask = low_bits[size>>2];
- // Assert that the register mask contains only bit sets.
- for (int i = 0; i < RM_SIZE; i++) {
+ assert(valid_watermarks(), "sanity");
+ for (int i = _lwm; i <= _hwm; i++) {
int bits = _A[i];
while (bits) { // Check bits for pairing
int bit = bits & -bits; // Extract low bit
@@ -339,14 +347,14 @@ bool RegMask::is_aligned_sets(const int size) const {
return true;
}
-//------------------------------is_bound_set-----------------------------------
// Return TRUE if the mask contains one adjacent set of bits and no other bits.
// Works also for size 1.
int RegMask::is_bound_set(const int size) const {
if( is_AllStack() ) return false;
assert(1 <= size && size <= 8, "update low bits table");
+ assert(valid_watermarks(), "sanity");
int bit = -1; // Set to hold the one bit allowed
- for (int i = 0; i < RM_SIZE; i++) {
+ for (int i = _lwm; i <= _hwm; i++) {
if (_A[i] ) { // Found some bits
if (bit != -1)
return false; // Already had bits, so fail
@@ -364,7 +372,7 @@ int RegMask::is_bound_set(const int size) const {
int set = bit>>24;
set = set & -set; // Remove sign extension.
set = (((set << size) - 1) >> 8);
- if (i >= RM_SIZE || _A[i] != set)
+ if (i > _hwm || _A[i] != set)
return false; // Require expected low bits in next word
}
}
@@ -373,7 +381,6 @@ int RegMask::is_bound_set(const int size) const {
return true;
}
-//------------------------------is_UP------------------------------------------
// UP means register only, Register plus stack, or stack only is DOWN
bool RegMask::is_UP() const {
// Quick common case check for DOWN (any stack slot is legal)
@@ -386,22 +393,22 @@ bool RegMask::is_UP() const {
return true;
}
-//------------------------------Size-------------------------------------------
// Compute size of register mask in bits
uint RegMask::Size() const {
extern uint8 bitsInByte[256];
uint sum = 0;
- for( int i = 0; i < RM_SIZE; i++ )
+ assert(valid_watermarks(), "sanity");
+ for( int i = _lwm; i <= _hwm; i++ ) {
sum +=
bitsInByte[(_A[i]>>24) & 0xff] +
bitsInByte[(_A[i]>>16) & 0xff] +
bitsInByte[(_A[i]>> 8) & 0xff] +
bitsInByte[ _A[i] & 0xff];
+ }
return sum;
}
#ifndef PRODUCT
-//------------------------------print------------------------------------------
void RegMask::dump(outputStream *st) const {
st->print("[");
RegMask rm = *this; // Structure copy into local temp
diff --git a/hotspot/src/share/vm/opto/regmask.hpp b/hotspot/src/share/vm/opto/regmask.hpp
index 5ceebb3fb..6cef16ad7 100644
--- a/hotspot/src/share/vm/opto/regmask.hpp
+++ b/hotspot/src/share/vm/opto/regmask.hpp
@@ -44,27 +44,12 @@
# include "adfiles/adGlobals_ppc_64.hpp"
#endif
-// Some fun naming (textual) substitutions:
-//
-// RegMask::get_low_elem() ==> RegMask::find_first_elem()
-// RegMask::Special ==> RegMask::Empty
-// RegMask::_flags ==> RegMask::is_AllStack()
-// RegMask::operator<<=() ==> RegMask::Insert()
-// RegMask::operator>>=() ==> RegMask::Remove()
-// RegMask::Union() ==> RegMask::OR
-// RegMask::Inter() ==> RegMask::AND
-//
-// OptoRegister::RegName ==> OptoReg::Name
-//
-// OptoReg::stack0() ==> _last_Mach_Reg or ZERO in core version
-//
-// numregs in chaitin ==> proper degree in chaitin
//-------------Non-zero bit search methods used by RegMask---------------------
// Find lowest 1, or return 32 if empty
int find_lowest_bit( uint32 mask );
// Find highest 1, or return 32 if empty
-int find_hihghest_bit( uint32 mask );
+int find_highest_bit( uint32 mask );
//------------------------------RegMask----------------------------------------
// The ADL file describes how to print the machine-specific registers, as well
@@ -97,6 +82,12 @@ class RegMask VALUE_OBJ_CLASS_SPEC {
public:
enum { CHUNK_SIZE = RM_SIZE*_WordBits };
+ // The low and high water marks represents the lowest and highest word
+ // that might contain set register mask bits, respectively. We guarantee
+ // that there are no bits in words outside this range, but any word at
+ // and between the two marks can still be 0.
+ int _lwm;
+ int _hwm;
// SlotsPerLong is 2, since slots are 32 bits and longs are 64 bits.
// Also, consider the maximum alignment size for a normally allocated
@@ -126,13 +117,21 @@ public:
# define BODY(I) _A[I] = a##I;
FORALL_BODY
# undef BODY
+ _lwm = 0;
+ _hwm = RM_SIZE - 1;
+ while (_hwm > 0 && _A[_hwm] == 0) _hwm--;
+ while ((_lwm < _hwm) && _A[_lwm] == 0) _lwm++;
+ assert(valid_watermarks(), "post-condition");
}
// Handy copying constructor
RegMask( RegMask *rm ) {
-# define BODY(I) _A[I] = rm->_A[I];
- FORALL_BODY
-# undef BODY
+ _hwm = rm->_hwm;
+ _lwm = rm->_lwm;
+ for (int i = 0; i < RM_SIZE; i++) {
+ _A[i] = rm->_A[i];
+ }
+ assert(valid_watermarks(), "post-condition");
}
// Construct an empty mask
@@ -162,30 +161,36 @@ public:
// Test for being a not-empty mask.
int is_NotEmpty( ) const {
+ assert(valid_watermarks(), "sanity");
int tmp = 0;
-# define BODY(I) tmp |= _A[I];
- FORALL_BODY
-# undef BODY
+ for (int i = _lwm; i <= _hwm; i++) {
+ tmp |= _A[i];
+ }
return tmp;
}
// Find lowest-numbered register from mask, or BAD if mask is empty.
OptoReg::Name find_first_elem() const {
- int base, bits;
-# define BODY(I) if( (bits = _A[I]) != 0 ) base = I<<_LogWordBits; else
- FORALL_BODY
-# undef BODY
- { base = OptoReg::Bad; bits = 1<<0; }
- return OptoReg::Name(base + find_lowest_bit(bits));
+ assert(valid_watermarks(), "sanity");
+ for (int i = _lwm; i <= _hwm; i++) {
+ int bits = _A[i];
+ if (bits) {
+ return OptoReg::Name((i<<_LogWordBits) + find_lowest_bit(bits));
+ }
+ }
+ return OptoReg::Name(OptoReg::Bad);
}
+
// Get highest-numbered register from mask, or BAD if mask is empty.
OptoReg::Name find_last_elem() const {
- int base, bits;
-# define BODY(I) if( (bits = _A[RM_SIZE-1-I]) != 0 ) base = (RM_SIZE-1-I)<<_LogWordBits; else
- FORALL_BODY
-# undef BODY
- { base = OptoReg::Bad; bits = 1<<0; }
- return OptoReg::Name(base + find_hihghest_bit(bits));
+ assert(valid_watermarks(), "sanity");
+ for (int i = _hwm; i >= _lwm; i--) {
+ int bits = _A[i];
+ if (bits) {
+ return OptoReg::Name((i<<_LogWordBits) + find_highest_bit(bits));
+ }
+ }
+ return OptoReg::Name(OptoReg::Bad);
}
// Find the lowest-numbered register pair in the mask. Return the
@@ -199,25 +204,34 @@ public:
void smear_to_pairs();
// Verify that the mask contains only aligned adjacent bit pairs
void verify_pairs() const { assert( is_aligned_pairs(), "mask is not aligned, adjacent pairs" ); }
+
+#ifdef ASSERT
+ // Verify watermarks are sane, i.e., within bounds and that no
+ // register words below or above the watermarks have bits set.
+ bool valid_watermarks() const {
+ assert(_hwm >= 0 && _hwm < RM_SIZE, err_msg("_hwm out of range: %d", _hwm));
+ assert(_lwm >= 0 && _lwm < RM_SIZE, err_msg("_lwm out of range: %d", _lwm));
+ for (int i = 0; i < _lwm; i++) {
+ assert(_A[i] == 0, err_msg("_lwm too high: %d regs at: %d", _lwm, i));
+ }
+ for (int i = _hwm + 1; i < RM_SIZE; i++) {
+ assert(_A[i] == 0, err_msg("_hwm too low: %d regs at: %d", _hwm, i));
+ }
+ return true;
+ }
+#endif // !ASSERT
+
// Test that the mask contains only aligned adjacent bit pairs
bool is_aligned_pairs() const;
// mask is a pair of misaligned registers
- bool is_misaligned_pair() const { return Size()==2 && !is_aligned_pairs(); }
+ bool is_misaligned_pair() const;
// Test for single register
- int is_bound1() const;
+ bool is_bound1() const;
// Test for a single adjacent pair
- int is_bound_pair() const;
+ bool is_bound_pair() const;
// Test for a single adjacent set of ideal register's size.
- int is_bound(uint ireg) const {
- if (is_vector(ireg)) {
- if (is_bound_set(num_registers(ireg)))
- return true;
- } else if (is_bound1() || is_bound_pair()) {
- return true;
- }
- return false;
- }
+ bool is_bound(uint ireg) const;
// Find the lowest-numbered register set in the mask. Return the
// HIGHEST register number in the set, or BAD if no sets.
@@ -228,8 +242,6 @@ public:
void clear_to_sets(const int size);
// Smear out partial bits to aligned adjacent bit sets.
void smear_to_sets(const int size);
- // Verify that the mask contains only aligned adjacent bit sets
- void verify_sets(int size) const { assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); }
// Test that the mask contains only aligned adjacent bit sets
bool is_aligned_sets(const int size) const;
@@ -244,11 +256,14 @@ public:
// Fast overlap test. Non-zero if any registers in common.
int overlap( const RegMask &rm ) const {
- return
-# define BODY(I) (_A[I] & rm._A[I]) |
- FORALL_BODY
-# undef BODY
- 0 ;
+ assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
+ int hwm = MIN2(_hwm, rm._hwm);
+ int lwm = MAX2(_lwm, rm._lwm);
+ int result = 0;
+ for (int i = lwm; i <= hwm; i++) {
+ result |= _A[i] & rm._A[i];
+ }
+ return result;
}
// Special test for register pressure based splitting
@@ -257,22 +272,29 @@ public:
// Clear a register mask
void Clear( ) {
-# define BODY(I) _A[I] = 0;
- FORALL_BODY
-# undef BODY
+ _lwm = RM_SIZE - 1;
+ _hwm = 0;
+ memset(_A, 0, sizeof(int)*RM_SIZE);
+ assert(valid_watermarks(), "sanity");
}
// Fill a register mask with 1's
void Set_All( ) {
-# define BODY(I) _A[I] = -1;
- FORALL_BODY
-# undef BODY
+ _lwm = 0;
+ _hwm = RM_SIZE - 1;
+ memset(_A, 0xFF, sizeof(int)*RM_SIZE);
+ assert(valid_watermarks(), "sanity");
}
// Insert register into mask
void Insert( OptoReg::Name reg ) {
- assert( reg < CHUNK_SIZE, "" );
- _A[reg>>_LogWordBits] |= (1<<(reg&(_WordBits-1)));
+ assert(reg < CHUNK_SIZE, "sanity");
+ assert(valid_watermarks(), "pre-condition");
+ int index = reg>>_LogWordBits;
+ if (index > _hwm) _hwm = index;
+ if (index < _lwm) _lwm = index;
+ _A[index] |= (1<<(reg&(_WordBits-1)));
+ assert(valid_watermarks(), "post-condition");
}
// Remove register from mask
@@ -283,23 +305,38 @@ public:
// OR 'rm' into 'this'
void OR( const RegMask &rm ) {
-# define BODY(I) this->_A[I] |= rm._A[I];
- FORALL_BODY
-# undef BODY
+ assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
+ // OR widens the live range
+ if (_lwm > rm._lwm) _lwm = rm._lwm;
+ if (_hwm < rm._hwm) _hwm = rm._hwm;
+ for (int i = _lwm; i <= _hwm; i++) {
+ _A[i] |= rm._A[i];
+ }
+ assert(valid_watermarks(), "sanity");
}
// AND 'rm' into 'this'
void AND( const RegMask &rm ) {
-# define BODY(I) this->_A[I] &= rm._A[I];
- FORALL_BODY
-# undef BODY
+ assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
+ // Do not evaluate words outside the current watermark range, as they are
+ // already zero and an &= would not change that
+ for (int i = _lwm; i <= _hwm; i++) {
+ _A[i] &= rm._A[i];
+ }
+ // Narrow the watermarks if &rm spans a narrower range.
+ // Update after to ensure non-overlapping words are zeroed out.
+ if (_lwm < rm._lwm) _lwm = rm._lwm;
+ if (_hwm > rm._hwm) _hwm = rm._hwm;
}
// Subtract 'rm' from 'this'
void SUBTRACT( const RegMask &rm ) {
-# define BODY(I) _A[I] &= ~rm._A[I];
- FORALL_BODY
-# undef BODY
+ assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
+ int hwm = MIN2(_hwm, rm._hwm);
+ int lwm = MAX2(_lwm, rm._lwm);
+ for (int i = lwm; i <= hwm; i++) {
+ _A[i] &= ~rm._A[i];
+ }
}
// Compute size of register mask: number of bits

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,479 @@
diff --git a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
index 1e9b1cb91..c0fd37d05 100644
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
@@ -2061,6 +2061,14 @@ public:
ld_st(Vt, T, a, op1, op2); \
}
+ void ld1_d(FloatRegister Vt, int index, const Address &a) {
+ starti;
+ assert(index == 0 || index == 1, "Index must be 0 or 1 for Vx.2D");
+ f(0, 31), f(index & 1, 30);
+ f(0b001101110, 29, 21), rf(a.index(), 16), f(0b1000, 15, 12);
+ f(0b01, 11, 10), rf(a.base(), 5), rf(Vt, 0);
+ }
+
INSN1(ld1, 0b001100010, 0b0111);
INSN2(ld1, 0b001100010, 0b1010);
INSN3(ld1, 0b001100010, 0b0110);
@@ -2186,6 +2194,13 @@ public:
#undef INSN
+ void faddp_d(FloatRegister Vd, FloatRegister Vn) {
+ starti;
+ f(0b01, 31, 30), f(0b1111100, 29, 23), f(0b1, 22), f(0b11000, 21, 17);
+ f(0b0110110, 16, 10);
+ rf(Vn, 5), rf(Vd, 0);
+ }
+
#define INSN(NAME, opc) \
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
starti; \
diff --git a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
index f2f85df60..873da580b 100644
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
@@ -2853,6 +2853,124 @@ void MacroAssembler::update_word_crc32(Register crc, Register v, Register tmp,
eor(crc, crc, tmp);
}
+/**
+ * Multiply and summation of 1 double-precision floating number pairs(sparse)
+ */
+void MacroAssembler::f2j_ddot_s1(Register dx, Register incx,
+ Register dy, Register incy) {
+ const FloatRegister tmpx = v2;
+ const FloatRegister tmpy = v3;
+
+ ld1_d(tmpx, 0, Address(dx, incx));
+ ld1_d(tmpy, 0, Address(dy, incy));
+ fmaddd(v0, tmpx, tmpy, v0);
+}
+
+/**
+ * Multiply and summation of 1 double-precision floating number pairs(dense)
+ */
+void MacroAssembler::f2j_ddot_d1(Register dx, Register dy, int size) {
+ const FloatRegister tmpx = v2;
+ const FloatRegister tmpy = v3;
+
+ ldrd(tmpx, post(dx, size));
+ ldrd(tmpy, post(dy, size));
+ fmaddd(v0, tmpx, tmpy, v0);
+}
+
+/**
+ * Multiply and summation of 4 double-precision floating numbers
+ */
+void MacroAssembler::f2j_ddot_d4(Register dx, Register dy) {
+ ld1(v2, v3, T2D, post(dx, 32));
+ ld1(v4, v5, T2D, post(dy, 32));
+ fmul(v2, T2D, v2, v4);
+ fmul(v3, T2D, v3, v5);
+ fadd(v0, T2D, v0, v2);
+ fadd(v6, T2D, v6, v3);
+}
+
+/**
+ * @param n register containing the number of doubles in array
+ * @param dx register pointing to input array
+ * @param incx register containing step len for dx
+ * @param dy register pointing to another input array
+ * @param incy register containing step len for dy
+ * @param temp_reg register containing loop variable
+ */
+void MacroAssembler::f2j_ddot(Register n, Register dx, Register incx,
+ Register dy, Register incy, Register temp_reg) {
+ Label Ldot_EXIT, Ldot_S_BEGIN, Ldot_S1, Ldot_S10, Ldot_S4, Ldot_D_BEGIN,
+ Ldot_D1, Ldot_D10, Ldot_D4;
+
+ const int SZ = 8;
+
+ enter();
+ fmovd(v0, zr);
+ fmovd(v6, v0);
+
+ cmp(n, zr);
+ br(Assembler::LE, Ldot_EXIT);
+
+ cmp(incx, 1);
+ br(Assembler::NE, Ldot_S_BEGIN);
+ cmp(incy, 1);
+ br(Assembler::NE, Ldot_S_BEGIN);
+
+ BIND(Ldot_D_BEGIN);
+ asr(temp_reg, n, 2);
+ cmp(temp_reg, zr);
+ br(Assembler::LE, Ldot_D1);
+
+ BIND(Ldot_D4);
+ f2j_ddot_d4(dx, dy);
+ subs(temp_reg, temp_reg, 1);
+ br(Assembler::NE, Ldot_D4);
+
+ fadd(v0, T2D, v0, v6);
+ faddp_d(v0, v0);
+
+ BIND(Ldot_D1);
+ ands(temp_reg, n, 3);
+ br(Assembler::LE, Ldot_EXIT);
+
+ BIND(Ldot_D10);
+ f2j_ddot_d1(dx, dy, SZ);
+ subs(temp_reg, temp_reg, 1);
+ br(Assembler::NE, Ldot_D10);
+ leave();
+ ret(lr);
+
+ BIND(Ldot_S_BEGIN);
+ lsl(incx, incx, 3);
+ lsl(incy, incy, 3);
+
+ asr(temp_reg, n, 2);
+ cmp(temp_reg, zr);
+ br(Assembler::LE, Ldot_S1);
+
+ BIND(Ldot_S4);
+ f2j_ddot_s1(dx, incx, dy, incy);
+ f2j_ddot_s1(dx, incx, dy, incy);
+ f2j_ddot_s1(dx, incx, dy, incy);
+ f2j_ddot_s1(dx, incx, dy, incy);
+ subs(temp_reg, temp_reg, 1);
+ br(Assembler::NE, Ldot_S4);
+
+ BIND(Ldot_S1);
+ ands(temp_reg, n, 3);
+ br(Assembler::LE, Ldot_EXIT);
+
+ BIND(Ldot_S10);
+ f2j_ddot_s1(dx, incx, dy, incy);
+ subs(temp_reg, temp_reg, 1);
+ br(Assembler::NE, Ldot_S10);
+
+ BIND(Ldot_EXIT);
+ leave();
+ ret(lr);
+}
+
/**
* @param crc register containing existing CRC (32-bit)
* @param buf register pointing to input byte buffer (byte*)
diff --git a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
index 388177589..1abc7e3b0 100644
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
@@ -1180,6 +1180,9 @@ public:
Register table0, Register table1, Register table2, Register table3,
bool upper = false);
+ void f2j_ddot(Register n, Register dx, Register incx,
+ Register dy, Register incy, Register temp_reg);
+
void string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result,
Register tmp1);
@@ -1236,6 +1239,11 @@ private:
// Uses rscratch2 if the address is not directly reachable
Address spill_address(int size, int offset, Register tmp=rscratch2);
+private:
+ void f2j_ddot_s1(Register dx, Register incx, Register dy, Register incy);
+ void f2j_ddot_d1(Register dx, Register dy, int size);
+ void f2j_ddot_d4(Register dx, Register dy);
+
public:
void spill(Register Rx, bool is64, int offset) {
if (is64) {
diff --git a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
index 0d73c0c0c..337d5c1dd 100644
--- a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
@@ -45,6 +45,7 @@
#include "stubRoutines_aarch64.hpp"
+
#ifdef COMPILER2
#include "opto/runtime.hpp"
#endif
@@ -3220,6 +3221,39 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ /**
+ * Arguments:
+ *
+ * Inputs:
+ * c_rarg0 - int n
+ * c_rarg1 - double[] dx
+ * c_rarg2 - int incx
+ * c_rarg3 - double[] dy
+ * c_rarg4 - int incy
+ *
+ * Output:
+ * d0 - ddot result
+ *
+ */
+ address generate_ddotF2jBLAS() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "f2jblas_ddot");
+
+ address start = __ pc();
+
+ const Register n = c_rarg0;
+ const Register dx = c_rarg1;
+ const Register incx = c_rarg2;
+ const Register dy = c_rarg3;
+ const Register incy = c_rarg4;
+
+ BLOCK_COMMENT("Entry:");
+
+ __ f2j_ddot(n, dx, incx, dy, incy, rscratch2);
+
+ return start;
+ }
+
/**
* Arguments:
*
@@ -4262,6 +4296,10 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_montgomerySquare = g.generate_multiply();
}
+ if (UseF2jBLASIntrinsics) {
+ StubRoutines::_ddotF2jBLAS = generate_ddotF2jBLAS();
+ }
+
if (UseAESIntrinsics) {
StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
diff --git a/hotspot/src/share/vm/classfile/vmSymbols.hpp b/hotspot/src/share/vm/classfile/vmSymbols.hpp
index 148f9212e..6bd8dbedd 100644
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp
@@ -852,6 +852,12 @@
do_name( implCompress_name, "implCompress0") \
do_signature(implCompress_signature, "([BI)V") \
\
+ /* support for com.github.fommil.netlib.F2jBLAS */ \
+ do_class(com_github_fommil_netlib_f2jblas, "com/github/fommil/netlib/F2jBLAS") \
+ do_intrinsic(_f2jblas_ddot, com_github_fommil_netlib_f2jblas, ddot_name, ddot_signature, F_R) \
+ do_name( ddot_name, "ddot") \
+ do_signature(ddot_signature, "(I[DI[DI)D") \
+ \
/* support for sun.security.provider.SHA2 */ \
do_class(sun_security_provider_sha2, "sun/security/provider/SHA2") \
do_intrinsic(_sha2_implCompress, sun_security_provider_sha2, implCompress_name, implCompress_signature, F_R) \
diff --git a/hotspot/src/share/vm/oops/method.cpp b/hotspot/src/share/vm/oops/method.cpp
index 24fae4d30..64cdae9c7 100644
--- a/hotspot/src/share/vm/oops/method.cpp
+++ b/hotspot/src/share/vm/oops/method.cpp
@@ -1281,7 +1281,9 @@ vmSymbols::SID Method::klass_id_for_intrinsics(Klass* holder) {
// which does not use the class default class loader so we check for its loader here
InstanceKlass* ik = InstanceKlass::cast(holder);
if ((ik->class_loader() != NULL) && !SystemDictionary::is_ext_class_loader(ik->class_loader())) {
- return vmSymbols::NO_SID; // regardless of name, no intrinsics here
+ if (!EnableIntrinsicExternal) {
+ return vmSymbols::NO_SID; // regardless of name, no intrinsics here
+ }
}
// see if the klass name is well-known:
diff --git a/hotspot/src/share/vm/opto/escape.cpp b/hotspot/src/share/vm/opto/escape.cpp
index 9ef1c5e69..aa1b1ac3a 100644
--- a/hotspot/src/share/vm/opto/escape.cpp
+++ b/hotspot/src/share/vm/opto/escape.cpp
@@ -978,7 +978,8 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
strcmp(call->as_CallLeaf()->_name, "squareToLen") == 0 ||
strcmp(call->as_CallLeaf()->_name, "mulAdd") == 0 ||
strcmp(call->as_CallLeaf()->_name, "montgomery_multiply") == 0 ||
- strcmp(call->as_CallLeaf()->_name, "montgomery_square") == 0)
+ strcmp(call->as_CallLeaf()->_name, "montgomery_square") == 0 ||
+ strcmp(call->as_CallLeaf()->_name, "f2jblas_ddot") == 0)
))) {
call->dump();
fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name));
diff --git a/hotspot/src/share/vm/opto/library_call.cpp b/hotspot/src/share/vm/opto/library_call.cpp
index 89ebabe6f..5cbc0f012 100644
--- a/hotspot/src/share/vm/opto/library_call.cpp
+++ b/hotspot/src/share/vm/opto/library_call.cpp
@@ -335,6 +335,7 @@ class LibraryCallKit : public GraphKit {
bool inline_mulAdd();
bool inline_montgomeryMultiply();
bool inline_montgomerySquare();
+ bool inline_ddotF2jBLAS();
bool inline_profileBoolean();
};
@@ -587,6 +588,10 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
if (!UseCRC32Intrinsics) return NULL;
break;
+ case vmIntrinsics::_f2jblas_ddot:
+ if (!UseF2jBLASIntrinsics) return NULL;
+ break;
+
case vmIntrinsics::_incrementExactI:
case vmIntrinsics::_addExactI:
if (!Matcher::match_rule_supported(Op_OverflowAddI) || !UseMathExactIntrinsics) return NULL;
@@ -983,6 +988,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
case vmIntrinsics::_profileBoolean:
return inline_profileBoolean();
+ case vmIntrinsics::_f2jblas_ddot:
+ return inline_ddotF2jBLAS();
default:
// If you get here, it may be that someone has added a new intrinsic
@@ -6303,6 +6310,49 @@ bool LibraryCallKit::inline_updateBytesCRC32() {
return true;
}
+/**
+ * double com.github.fommil.netlib.F2jBLAS.ddot(int n, double[] dx, int incx, double[] dy, int incy)
+ */
+bool LibraryCallKit::inline_ddotF2jBLAS() {
+ assert(callee()->signature()->size() == 5, "update has 5 parameters");
+ Node* n = argument(1); // type: int
+ Node* dx = argument(2); // type: double[]
+ Node* incx = argument(3); // type: int
+ Node* dy = argument(4); // type: double[]
+ Node* incy = argument(5); // type: int
+
+ const Type* dx_type = dx->Value(&_gvn);
+ const Type* dy_type = dy->Value(&_gvn);
+ const TypeAryPtr* dx_top_src = dx_type->isa_aryptr();
+ const TypeAryPtr* dy_top_src = dy_type->isa_aryptr();
+ if (dx_top_src == NULL || dx_top_src->klass() == NULL ||
+ dy_top_src == NULL || dy_top_src->klass() == NULL) {
+ // failed array check
+ return false;
+ }
+
+ // Figure out the size and type of the elements we will be copying.
+ BasicType dx_elem = dx_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+ BasicType dy_elem = dy_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+ if (dx_elem != T_DOUBLE || dy_elem != T_DOUBLE) {
+ return false;
+ }
+
+ // 'dx_start' points to dx array + scaled offset
+ Node* dx_start = array_element_address(dx, intcon(0), dx_elem);
+ Node* dy_start = array_element_address(dy, intcon(0), dy_elem);
+
+ address stubAddr = StubRoutines::ddotF2jBLAS();
+ const char *stubName = "f2jblas_ddot";
+ Node* call;
+ call = make_runtime_call(RC_LEAF, OptoRuntime::ddotF2jBLAS_Type(),
+ stubAddr, stubName, TypePtr::BOTTOM,
+ n, dx_start, incx, dy_start, incy);
+ Node* result = _gvn.transform(new (C) ProjNode(call, TypeFunc::Parms));
+ set_result(result);
+ return true;
+}
+
/**
* Calculate CRC32 for ByteBuffer.
* int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
diff --git a/hotspot/src/share/vm/opto/runtime.cpp b/hotspot/src/share/vm/opto/runtime.cpp
index ba8f42e49..f1fe4d666 100644
--- a/hotspot/src/share/vm/opto/runtime.cpp
+++ b/hotspot/src/share/vm/opto/runtime.cpp
@@ -920,6 +920,30 @@ const TypeFunc* OptoRuntime::updateBytesCRC32_Type() {
return TypeFunc::make(domain, range);
}
+/**
+ * double ddot(int n, double *dx, int incx, double *dy, int incy)
+ */
+const TypeFunc* OptoRuntime::ddotF2jBLAS_Type() {
+ // create input type (domain)
+ int num_args = 5;
+ int argcnt = num_args;
+ const Type** fields = TypeTuple::fields(argcnt);
+ int argp = TypeFunc::Parms;
+ fields[argp++] = TypeInt::INT; // n
+ fields[argp++] = TypeAryPtr::DOUBLES; // dx
+ fields[argp++] = TypeInt::INT; // incx
+ fields[argp++] = TypeAryPtr::DOUBLES; // dy
+ fields[argp++] = TypeInt::INT; // incy
+ assert(argp == TypeFunc::Parms + argcnt, "correct decoding");
+ const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + argcnt, fields);
+
+ // result type needed
+ fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms + 0] = Type::DOUBLE;
+ const TypeTuple* range = TypeTuple::make(TypeFunc::Parms + 1, fields);
+ return TypeFunc::make(domain, range);
+}
+
// for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning int
const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
// create input type (domain)
diff --git a/hotspot/src/share/vm/opto/runtime.hpp b/hotspot/src/share/vm/opto/runtime.hpp
index e3bdfdf9c..66d393c5c 100644
--- a/hotspot/src/share/vm/opto/runtime.hpp
+++ b/hotspot/src/share/vm/opto/runtime.hpp
@@ -317,6 +317,8 @@ private:
static const TypeFunc* updateBytesCRC32_Type();
+ static const TypeFunc* ddotF2jBLAS_Type();
+
// leaf on stack replacement interpreter accessor types
static const TypeFunc* osr_end_Type();
diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp
index 7b17e623b..520cc3187 100644
--- a/hotspot/src/share/vm/runtime/globals.hpp
+++ b/hotspot/src/share/vm/runtime/globals.hpp
@@ -743,6 +743,12 @@ class CommandLineFlags {
product(bool, UseCRC32Intrinsics, false, \
"use intrinsics for java.util.zip.CRC32") \
\
+ experimental(bool, UseF2jBLASIntrinsics, false, \
+ "use intrinsics for com.github.fommil.netlib.F2jBLAS on aarch64") \
+ \
+ experimental(bool, EnableIntrinsicExternal, false, \
+ "enable intrinsics for methods of external packages") \
+ \
develop(bool, TraceCallFixup, false, \
"Trace all call fixups") \
\
diff --git a/hotspot/src/share/vm/runtime/stubRoutines.cpp b/hotspot/src/share/vm/runtime/stubRoutines.cpp
index d943248da..10f438bc5 100644
--- a/hotspot/src/share/vm/runtime/stubRoutines.cpp
+++ b/hotspot/src/share/vm/runtime/stubRoutines.cpp
@@ -136,6 +136,8 @@ address StubRoutines::_sha512_implCompressMB = NULL;
address StubRoutines::_updateBytesCRC32 = NULL;
address StubRoutines::_crc_table_adr = NULL;
+address StubRoutines::_ddotF2jBLAS = NULL;
+
address StubRoutines::_multiplyToLen = NULL;
address StubRoutines::_squareToLen = NULL;
address StubRoutines::_mulAdd = NULL;
diff --git a/hotspot/src/share/vm/runtime/stubRoutines.hpp b/hotspot/src/share/vm/runtime/stubRoutines.hpp
index e18b9127d..a4eeb910d 100644
--- a/hotspot/src/share/vm/runtime/stubRoutines.hpp
+++ b/hotspot/src/share/vm/runtime/stubRoutines.hpp
@@ -214,6 +214,8 @@ class StubRoutines: AllStatic {
static address _updateBytesCRC32;
static address _crc_table_adr;
+ static address _ddotF2jBLAS;
+
static address _multiplyToLen;
static address _squareToLen;
static address _mulAdd;
@@ -377,6 +379,8 @@ class StubRoutines: AllStatic {
static address updateBytesCRC32() { return _updateBytesCRC32; }
static address crc_table_addr() { return _crc_table_adr; }
+ static address ddotF2jBLAS() { return _ddotF2jBLAS; }
+
static address multiplyToLen() {return _multiplyToLen; }
static address squareToLen() {return _squareToLen; }
static address mulAdd() {return _mulAdd; }

View File

@ -1,679 +0,0 @@
commit 3ece3b6a87e4bf61a1f786c12d796012becce313
Author: hexuejin <hexuejin2@huawei.com>
Date: Thu May 28 10:30:20 2020 +0800
Add FastSerializer
DTS/AR: AR.SR.IREQ02369011.001.001
Summary:<core-libs>: Add FastSerializer
LLT: jtreg
Patch Type: huawei
Bug url: NA
diff --git a/hotspot/src/share/vm/prims/unsafe.cpp b/hotspot/src/share/vm/prims/unsafe.cpp
index cdb72c0d5..d50041635 100644
--- a/hotspot/src/share/vm/prims/unsafe.cpp
+++ b/hotspot/src/share/vm/prims/unsafe.cpp
@@ -1361,6 +1361,10 @@ UNSAFE_ENTRY(void, Unsafe_PrefetchWrite(JNIEnv* env, jclass ignored, jobject obj
Prefetch::write(addr, (intx)offset);
UNSAFE_END
+UNSAFE_ENTRY(jboolean, Unsafe_GetUseFastSerializer(JNIEnv *env, jobject unsafe)) {
+ return UseFastSerializer;
+}
+UNSAFE_END
/// JVM_RegisterUnsafeMethods
@@ -1447,7 +1451,8 @@ static JNINativeMethod methods_140[] = {
{CC "allocateInstance", CC "(" CLS ")" OBJ, FN_PTR(Unsafe_AllocateInstance)},
{CC "monitorEnter", CC "(" OBJ ")V", FN_PTR(Unsafe_MonitorEnter)},
{CC "monitorExit", CC "(" OBJ ")V", FN_PTR(Unsafe_MonitorExit)},
- {CC "throwException", CC "(" THR ")V", FN_PTR(Unsafe_ThrowException)}
+ {CC "throwException", CC "(" THR ")V", FN_PTR(Unsafe_ThrowException)},
+ {CC "getUseFastSerializer", CC "()Z", FN_PTR(Unsafe_GetUseFastSerializer)}
};
// These are the methods prior to the JSR 166 changes in 1.5.0
@@ -1493,8 +1498,8 @@ static JNINativeMethod methods_141[] = {
{CC "allocateInstance", CC "(" CLS ")" OBJ, FN_PTR(Unsafe_AllocateInstance)},
{CC "monitorEnter", CC "(" OBJ ")V", FN_PTR(Unsafe_MonitorEnter)},
{CC "monitorExit", CC "(" OBJ ")V", FN_PTR(Unsafe_MonitorExit)},
- {CC "throwException", CC "(" THR ")V", FN_PTR(Unsafe_ThrowException)}
-
+ {CC "throwException", CC "(" THR ")V", FN_PTR(Unsafe_ThrowException)},
+ {CC "getUseFastSerializer", CC "()Z", FN_PTR(Unsafe_GetUseFastSerializer)}
};
// These are the methods prior to the JSR 166 changes in 1.6.0
@@ -1548,7 +1553,8 @@ static JNINativeMethod methods_15[] = {
{CC "compareAndSwapInt", CC "(" OBJ "J""I""I"")Z", FN_PTR(Unsafe_CompareAndSwapInt)},
{CC "compareAndSwapLong", CC "(" OBJ "J""J""J"")Z", FN_PTR(Unsafe_CompareAndSwapLong)},
{CC "park", CC "(ZJ)V", FN_PTR(Unsafe_Park)},
- {CC "unpark", CC "(" OBJ ")V", FN_PTR(Unsafe_Unpark)}
+ {CC "unpark", CC "(" OBJ ")V", FN_PTR(Unsafe_Unpark)},
+ {CC "getUseFastSerializer", CC "()Z", FN_PTR(Unsafe_GetUseFastSerializer)}
};
@@ -1606,7 +1612,8 @@ static JNINativeMethod methods_16[] = {
{CC "putOrderedInt", CC "(" OBJ "JI)V", FN_PTR(Unsafe_SetOrderedInt)},
{CC "putOrderedLong", CC "(" OBJ "JJ)V", FN_PTR(Unsafe_SetOrderedLong)},
{CC "park", CC "(ZJ)V", FN_PTR(Unsafe_Park)},
- {CC "unpark", CC "(" OBJ ")V", FN_PTR(Unsafe_Unpark)}
+ {CC "unpark", CC "(" OBJ ")V", FN_PTR(Unsafe_Unpark)},
+ {CC "getUseFastSerializer", CC "()Z", FN_PTR(Unsafe_GetUseFastSerializer)}
};
// These are the methods for 1.8.0
@@ -1662,7 +1669,8 @@ static JNINativeMethod methods_18[] = {
{CC "putOrderedInt", CC "(" OBJ "JI)V", FN_PTR(Unsafe_SetOrderedInt)},
{CC "putOrderedLong", CC "(" OBJ "JJ)V", FN_PTR(Unsafe_SetOrderedLong)},
{CC "park", CC "(ZJ)V", FN_PTR(Unsafe_Park)},
- {CC "unpark", CC "(" OBJ ")V", FN_PTR(Unsafe_Unpark)}
+ {CC "unpark", CC "(" OBJ ")V", FN_PTR(Unsafe_Unpark)},
+ {CC "getUseFastSerializer", CC "()Z", FN_PTR(Unsafe_GetUseFastSerializer)}
};
JNINativeMethod loadavg_method[] = {
diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp
index 2e6ff26ed..0a6ebfae1 100644
--- a/hotspot/src/share/vm/runtime/globals.hpp
+++ b/hotspot/src/share/vm/runtime/globals.hpp
@@ -553,6 +553,10 @@ class CommandLineFlags {
"Enable normal processing of flags relating to experimental " \
"features") \
\
+ experimental(bool, UseFastSerializer, false, \
+ "Cache-based serialization.It is extremely fast, but it can only" \
+ "be effective in certain scenarios.") \
+ \
product(bool, JavaMonitorsInStackTrace, true, \
"Print information about Java monitor locks when the stacks are" \
"dumped") \
diff --git a/jdk/src/share/classes/java/io/ObjectInputStream.java b/jdk/src/share/classes/java/io/ObjectInputStream.java
index 5d30f2a01..b67f01719 100644
--- a/jdk/src/share/classes/java/io/ObjectInputStream.java
+++ b/jdk/src/share/classes/java/io/ObjectInputStream.java
@@ -49,6 +49,7 @@ import sun.misc.SharedSecrets;
import sun.misc.JavaOISAccess;
import sun.util.logging.PlatformLogger;
import sun.security.action.GetBooleanAction;
+import sun.misc.Unsafe;
/**
* An ObjectInputStream deserializes primitive data and objects previously
@@ -284,6 +285,22 @@ public class ObjectInputStream
traceLogger = (filterLog != null &&
filterLog.isLoggable(PlatformLogger.Level.FINER)) ? filterLog : null;
}
+
+ /*
+ * Logger for FastSerializer.
+ * Setup the FastSerializer logger if it is set to FINE
+ * (Assuming it will not change).
+ */
+ private static final PlatformLogger fastSerLogger;
+ static {
+ if (printFastSerializer) {
+ PlatformLogger fastSerLog = PlatformLogger.getLogger("fastSerializer");
+ fastSerLogger = (fastSerLog != null &&
+ fastSerLog.isLoggable(PlatformLogger.Level.FINE)) ? fastSerLog : null;
+ } else {
+ fastSerLogger = null;
+ }
+ }
}
/** filter stream for handling block data conversion */
@@ -312,6 +329,9 @@ public class ObjectInputStream
/** if true, invoke resolveObject() */
private boolean enableResolve;
+ /** Used to get the commandline option: useFastSerializer */
+ private static final Unsafe UNSAFE = Unsafe.getUnsafe();
+
/**
* Context during upcalls to class-defined readObject methods; holds
* object currently being deserialized and descriptor for current class.
@@ -325,6 +345,33 @@ public class ObjectInputStream
*/
private ObjectInputFilter serialFilter;
+ /**
+ * value of "useFastSerializer" property
+ */
+ private static final boolean defaultFastSerializer = UNSAFE.getUseFastSerializer();
+
+ /**
+ * true or false for open FastSerilizer
+ * May be changed in readStreamHeader
+ */
+ private boolean useFastSerializer = defaultFastSerializer;
+
+ /**
+ * Value of "fastSerializerEscapeMode" property. It can be turned on
+ * when useFastSerializer is true.
+ */
+ private static final boolean fastSerializerEscapeMode = java.security.AccessController.doPrivileged(
+ new sun.security.action.GetBooleanAction(
+ "fastSerializerEscapeMode")).booleanValue();
+
+ /**
+ * value of "printFastSerializer" property,
+ * as true or false for printing FastSerializer logs.
+ */
+ private static final boolean printFastSerializer = java.security.AccessController.doPrivileged(
+ new sun.security.action.GetBooleanAction(
+ "printFastSerializer")).booleanValue();
+
/**
* Creates an ObjectInputStream that reads from the specified InputStream.
* A serialization stream header is read from the stream and verified.
@@ -396,6 +443,9 @@ public class ObjectInputStream
* transitively so that a complete equivalent graph of objects is
* reconstructed by readObject.
*
+ * The difference between fastSerialzation and default serialization is the
+ * descriptor serialization. The data serialization is same with each other.
+ *
* <p>The root object is completely restored when all of its fields and the
* objects it references are completely restored. At this point the object
* validation callbacks are executed in order based on their registered
@@ -670,11 +720,20 @@ public class ObjectInputStream
vlist.register(obj, prio);
}
+ /**
+ * Cache the class meta during serialization.
+ * Only used in FastSerilizer.
+ */
+ protected static ConcurrentHashMap<String,Class<?>> nameToClass = new ConcurrentHashMap<>();
+
/**
* Load the local class equivalent of the specified stream class
* description. Subclasses may implement this method to allow classes to
* be fetched from an alternate source.
*
+ * When fastSerializer is turned on, fields of desc will be null except
+ * name. When resolveClass is override, this may cause null pointer exception.
+ *
* <p>The corresponding method in <code>ObjectOutputStream</code> is
* <code>annotateClass</code>. This method will be invoked only once for
* each unique class in the stream. This method can be implemented by
@@ -715,16 +774,27 @@ public class ObjectInputStream
throws IOException, ClassNotFoundException
{
String name = desc.getName();
+ Class<?> cl = null;
+
+ if (useFastSerializer) {
+ cl = nameToClass.get(name);
+ if (cl != null) {
+ return cl;
+ }
+ }
try {
- return Class.forName(name, false, latestUserDefinedLoader());
+ cl = Class.forName(name, false, latestUserDefinedLoader());
} catch (ClassNotFoundException ex) {
- Class<?> cl = primClasses.get(name);
- if (cl != null) {
- return cl;
- } else {
+ cl = primClasses.get(name);
+ if (cl == null) {
throw ex;
}
}
+ if (useFastSerializer) {
+ nameToClass.put(name, cl);
+ }
+
+ return cl;
}
/**
@@ -894,9 +964,34 @@ public class ObjectInputStream
{
short s0 = bin.readShort();
short s1 = bin.readShort();
- if (s0 != STREAM_MAGIC || s1 != STREAM_VERSION) {
- throw new StreamCorruptedException(
- String.format("invalid stream header: %04X%04X", s0, s1));
+ if (useFastSerializer) {
+ if (s0 != STREAM_MAGIC_FAST || s1 != STREAM_VERSION) {
+
+ if (s0 != STREAM_MAGIC) {
+ throw new StreamCorruptedException(
+ String.format("invalid stream header: %04X%04X, and FastSerializer is activated", s0, s1));
+ }
+
+ if (!fastSerializerEscapeMode) {
+ throw new StreamCorruptedException(
+ String.format("invalid stream header: %04X%04X.Fast serialization does not support " +
+ "original serialized files", s0, s1));
+ }
+
+ // Escape to default serialization
+ useFastSerializer = false;
+ if (Logging.fastSerLogger != null) {
+ Logging.fastSerLogger.fine("[Deserialize]: Escape and disable FastSerializer");
+ }
+ }
+ } else if (s0 != STREAM_MAGIC || s1 != STREAM_VERSION) {
+ if (s0 == STREAM_MAGIC_FAST && s1 == STREAM_VERSION) {
+ throw new StreamCorruptedException(
+ String.format("invalid stream header: %04X%04X, and it is a FastSerializer stream", s0, s1));
+ } else {
+ throw new StreamCorruptedException(
+ String.format("invalid stream header: %04X%04X", s0, s1));
+ }
}
}
@@ -910,6 +1005,11 @@ public class ObjectInputStream
* this method reads class descriptors according to the format defined in
* the Object Serialization specification.
*
+ * In fastSerialize mode, the descriptor is obtained by lookup method. And
+ * the resolveClass method is called here to get the classmeta. Since the
+ * descriptor is obtained by lookup, the descriptor is same as localdesc.
+ * So we cann't distinguish the receiver desc and local desc.
+ *
* @return the class descriptor read
* @throws IOException If an I/O error has occurred.
* @throws ClassNotFoundException If the Class of a serialized object used
@@ -920,6 +1020,27 @@ public class ObjectInputStream
protected ObjectStreamClass readClassDescriptor()
throws IOException, ClassNotFoundException
{
+ // fastSerializer
+ if (useFastSerializer) {
+ String name = readUTF();
+ Class<?> cl = null;
+ ObjectStreamClass desc = new ObjectStreamClass(name);
+ try {
+ // In order to match this method, we add an annotateClass method in
+ // writeClassDescriptor.
+ cl = resolveClass(desc);
+ } catch (ClassNotFoundException ex) {
+ // resolveClass is just used to obtain Class which required by lookup method
+ // and it will be called again later, so we don't throw ClassNotFoundException here.
+ return desc;
+ }
+ if (cl != null) {
+ desc = ObjectStreamClass.lookup(cl, true);
+ }
+ return desc;
+ }
+
+ // Default deserialization. If the Class cannot be found, throw ClassNotFoundException.
ObjectStreamClass desc = new ObjectStreamClass();
desc.readNonProxy(this);
return desc;
@@ -1935,17 +2056,40 @@ public class ObjectInputStream
skipCustomData();
- try {
- totalObjectRefs++;
- depth++;
- desc.initNonProxy(readDesc, cl, resolveEx, readClassDesc(false));
- } finally {
- depth--;
+ totalObjectRefs++;
+ depth++;
+
+ if (useFastSerializer) {
+ desc.initNonProxyFast(readDesc, resolveEx);
+ ObjectStreamClass superDesc = desc.getSuperDesc();
+ long originDepth = depth - 1;
+ // Since desc is obtained from the lookup method, we will lose the depth and
+ // totalObjectRefs of superDesc. So we add a loop here to compute the depth
+ // and objectRef of superDesc.
+ while (superDesc != null && superDesc.forClass() != null) {
+ filterCheck(superDesc.forClass(), -1);
+ superDesc = superDesc.getSuperDesc();
+ totalObjectRefs++;
+ depth++;
+ }
+ depth = originDepth;
+ } else {
+ try {
+ desc.initNonProxy(readDesc, cl, resolveEx, readClassDesc(false));
+ } finally {
+ depth--;
+ }
}
handles.finish(descHandle);
passHandle = descHandle;
+ if (Logging.fastSerLogger != null) {
+ Logging.fastSerLogger.fine(
+ "[Deserialize] useFastSerializer:{0}, Class name:{1}, SerialVersionUID:{2}, flags:{3}",
+ useFastSerializer, desc.getName(), desc.getSerialVersionUID(), desc.getFlags(this));
+ }
+
return desc;
}
@@ -2334,21 +2478,25 @@ public class ObjectInputStream
desc.setPrimFieldValues(obj, primVals);
}
- int objHandle = passHandle;
- ObjectStreamField[] fields = desc.getFields(false);
- Object[] objVals = new Object[desc.getNumObjFields()];
- int numPrimFields = fields.length - objVals.length;
- for (int i = 0; i < objVals.length; i++) {
- ObjectStreamField f = fields[numPrimFields + i];
- objVals[i] = readObject0(Object.class, f.isUnshared());
- if (f.getField() != null) {
- handles.markDependency(objHandle, passHandle);
+ Object[] objVals = null;
+ int numObjFields = desc.getNumObjFields();
+ if (numObjFields > 0) {
+ int objHandle = passHandle;
+ ObjectStreamField[] fields = desc.getFields(false);
+ objVals = new Object[numObjFields];
+ int numPrimFields = fields.length - objVals.length;
+ for (int i = 0; i < objVals.length; i++) {
+ ObjectStreamField f = fields[numPrimFields + i];
+ objVals[i] = readObject0(Object.class, f.isUnshared());
+ if (f.getField() != null) {
+ handles.markDependency(objHandle, passHandle);
+ }
}
+ if (obj != null) {
+ desc.setObjFieldValues(obj, objVals);
+ }
+ passHandle = objHandle;
}
- if (obj != null) {
- desc.setObjFieldValues(obj, objVals);
- }
- passHandle = objHandle;
}
/**
diff --git a/jdk/src/share/classes/java/io/ObjectOutputStream.java b/jdk/src/share/classes/java/io/ObjectOutputStream.java
index 6d29e3a1f..3890efc3e 100644
--- a/jdk/src/share/classes/java/io/ObjectOutputStream.java
+++ b/jdk/src/share/classes/java/io/ObjectOutputStream.java
@@ -37,6 +37,8 @@ import java.util.concurrent.ConcurrentMap;
import static java.io.ObjectStreamClass.processQueue;
import java.io.SerialCallbackContext;
import sun.reflect.misc.ReflectUtil;
+import sun.misc.Unsafe;
+import sun.util.logging.PlatformLogger;
/**
* An ObjectOutputStream writes primitive data types and graphs of Java objects
@@ -173,6 +175,24 @@ public class ObjectOutputStream
new ReferenceQueue<>();
}
+ private static class Logging {
+ /*
+ * Logger for FastSerializer.
+ * Setup the FastSerializer logger if it is set to FINE.
+ * (Assuming it will not change).
+ */
+ static final PlatformLogger fastSerLogger;
+ static {
+ if (printFastSerializer) {
+ PlatformLogger fastSerLog = PlatformLogger.getLogger("fastSerializer");
+ fastSerLogger = (fastSerLog != null &&
+ fastSerLog.isLoggable(PlatformLogger.Level.FINE)) ? fastSerLog : null;
+ } else {
+ fastSerLogger = null;
+ }
+ }
+ }
+
/** filter stream for handling block data conversion */
private final BlockDataOutputStream bout;
/** obj -> wire handle map */
@@ -214,6 +234,22 @@ public class ObjectOutputStream
new sun.security.action.GetBooleanAction(
"sun.io.serialization.extendedDebugInfo")).booleanValue();
+ private static final Unsafe UNSAFE = Unsafe.getUnsafe();
+
+ /**
+ * Value of "UseFastSerializer" property. The fastSerializer is turned
+ * on when it is true.
+ */
+ private static final boolean useFastSerializer = UNSAFE.getUseFastSerializer();
+
+ /**
+ * value of "printFastSerializer" property,
+ * as true or false for printing FastSerializer logs.
+ */
+ private static final boolean printFastSerializer = java.security.AccessController.doPrivileged(
+ new sun.security.action.GetBooleanAction(
+ "printFastSerializer")).booleanValue();
+
/**
* Creates an ObjectOutputStream that writes to the specified OutputStream.
* This constructor writes the serialization stream header to the
@@ -327,6 +363,9 @@ public class ObjectOutputStream
* object are written transitively so that a complete equivalent graph of
* objects can be reconstructed by an ObjectInputStream.
*
+ * The difference between fastSerialzation and default serialization is the
+ * descriptor serialization. The data serialization is same with each other.
+ *
* <p>Exceptions are thrown for problems with the OutputStream and for
* classes that should not be serialized. All exceptions are fatal to the
* OutputStream, which is left in an indeterminate state, and it is up to
@@ -633,7 +672,11 @@ public class ObjectOutputStream
* stream
*/
protected void writeStreamHeader() throws IOException {
- bout.writeShort(STREAM_MAGIC);
+ if (useFastSerializer) {
+ bout.writeShort(STREAM_MAGIC_FAST);
+ } else {
+ bout.writeShort(STREAM_MAGIC);
+ }
bout.writeShort(STREAM_VERSION);
}
@@ -648,6 +691,9 @@ public class ObjectOutputStream
* By default, this method writes class descriptors according to the format
* defined in the Object Serialization specification.
*
+ * In fastSerializer mode, we will only write the classname to the stream.
+ * The annotateClass is used to match the resolveClass in readClassDescriptor.
+ *
* <p>Note that this method will only be called if the ObjectOutputStream
* is not using the old serialization stream format (set by calling
* ObjectOutputStream's <code>useProtocolVersion</code> method). If this
@@ -665,7 +711,14 @@ public class ObjectOutputStream
protected void writeClassDescriptor(ObjectStreamClass desc)
throws IOException
{
- desc.writeNonProxy(this);
+ if (useFastSerializer) {
+ writeUTF(desc.getName());
+ // The annotateClass is used to match the resolveClass called in
+ // readClassDescriptor.
+ annotateClass(desc.forClass());
+ } else {
+ desc.writeNonProxy(this);
+ }
}
/**
@@ -1275,9 +1328,21 @@ public class ObjectOutputStream
bout.writeByte(TC_CLASSDESC);
handles.assign(unshared ? null : desc);
+ if (Logging.fastSerLogger != null) {
+ Logging.fastSerLogger.fine(
+ "[Serialize] useFastSerializer:{0}, Class name:{1}, SerialVersionUID:{2}, flags:{3}, protocol:{4}",
+ useFastSerializer, desc.getName(), desc.getSerialVersionUID(), desc.getFlags(this), protocol);
+ }
+
if (protocol == PROTOCOL_VERSION_1) {
// do not invoke class descriptor write hook with old protocol
- desc.writeNonProxy(this);
+ if (useFastSerializer) {
+ // only write name and annotate class when using FastSerializer
+ writeUTF(desc.getName());
+ annotateClass(desc.forClass());
+ } else {
+ desc.writeNonProxy(this);
+ }
} else {
writeClassDescriptor(desc);
}
@@ -1291,7 +1356,9 @@ public class ObjectOutputStream
bout.setBlockDataMode(false);
bout.writeByte(TC_ENDBLOCKDATA);
- writeClassDesc(desc.getSuperDesc(), false);
+ if (!useFastSerializer) {
+ writeClassDesc(desc.getSuperDesc(), false);
+ }
}
/**
diff --git a/jdk/src/share/classes/java/io/ObjectStreamClass.java b/jdk/src/share/classes/java/io/ObjectStreamClass.java
index 64453b25a..fce3c3475 100644
--- a/jdk/src/share/classes/java/io/ObjectStreamClass.java
+++ b/jdk/src/share/classes/java/io/ObjectStreamClass.java
@@ -280,6 +280,40 @@ public class ObjectStreamClass implements Serializable {
return suid.longValue();
}
+ /**
+ * Return the flags for this class described by this descriptor. The flags
+ * means a set of bit masks for ObjectStreamClass, which indicate the status
+ * of SC_WRITE_METHOD, SC_SERIALIZABLE, SC_EXTERNALIZABLE, SC_BLOCK_DATA and
+ * SC_ENUM.
+ *
+ * @param serialStream ObjectOutputStream or ObjectInputStream
+ *
+ * @return the flags for this class described by this descriptor
+ */
+ public byte getFlags(Object serialStream) {
+ byte flags = 0;
+ if (externalizable) {
+ flags |= ObjectStreamConstants.SC_EXTERNALIZABLE;
+ if (serialStream instanceof ObjectOutputStream) {
+ int protocol = ((ObjectOutputStream)serialStream).getProtocolVersion();
+ if (protocol != ObjectStreamConstants.PROTOCOL_VERSION_1) {
+ flags |= ObjectStreamConstants.SC_BLOCK_DATA;
+ }
+ } else if (serialStream instanceof ObjectInputStream) {
+ flags |= ObjectStreamConstants.SC_BLOCK_DATA;
+ }
+ } else if (serializable) {
+ flags |= ObjectStreamConstants.SC_SERIALIZABLE;
+ }
+ if (hasWriteObjectData) {
+ flags |= ObjectStreamConstants.SC_WRITE_METHOD;
+ }
+ if (isEnum) {
+ flags |= ObjectStreamConstants.SC_ENUM;
+ }
+ return flags;
+ }
+
/**
* Return the class in the local VM that this version is mapped to. Null
* is returned if there is no corresponding local class.
@@ -570,6 +604,15 @@ public class ObjectStreamClass implements Serializable {
ObjectStreamClass() {
}
+ /**
+ * Create a blank class descriptor with name. It is only used
+ * in fastSerialize path.
+ * @param name class name
+ */
+ ObjectStreamClass(String name) {
+ this.name = name;
+ }
+
/**
* Creates a PermissionDomain that grants no permission.
*/
@@ -756,6 +799,44 @@ public class ObjectStreamClass implements Serializable {
initialized = true;
}
+ /**
+ * Initializes class descriptor representing a non-proxy class.
+ * Used in fast serialization mode.
+ */
+ void initNonProxyFast(ObjectStreamClass model,
+ ClassNotFoundException resolveEx)
+ {
+ this.cl = model.cl;
+ this.resolveEx = resolveEx;
+ this.superDesc = model.superDesc;
+ name = model.name;
+ this.suid = model.suid;
+ isProxy = false;
+ isEnum = model.isEnum;
+ serializable = model.serializable;
+ externalizable = model.externalizable;
+ hasBlockExternalData = model.hasBlockExternalData;
+ hasWriteObjectData = model.hasWriteObjectData;
+ fields = model.fields;
+ primDataSize = model.primDataSize;
+ numObjFields = model.numObjFields;
+
+ writeObjectMethod = model.writeObjectMethod;
+ readObjectMethod = model.readObjectMethod;
+ readObjectNoDataMethod = model.readObjectNoDataMethod;
+ writeReplaceMethod = model.writeReplaceMethod;
+ readResolveMethod = model.readResolveMethod;
+ if (deserializeEx == null) {
+ deserializeEx = model.deserializeEx;
+ }
+ domains = model.domains;
+ cons = model.cons;
+ fieldRefl = model.fieldRefl;
+ localDesc = model;
+
+ initialized = true;
+ }
+
/**
* Reads non-proxy class descriptor information from given input stream.
* The resulting class descriptor is not fully functional; it can only be
diff --git a/jdk/src/share/classes/java/io/ObjectStreamConstants.java b/jdk/src/share/classes/java/io/ObjectStreamConstants.java
index 23f72b436..59179a6ec 100644
--- a/jdk/src/share/classes/java/io/ObjectStreamConstants.java
+++ b/jdk/src/share/classes/java/io/ObjectStreamConstants.java
@@ -38,6 +38,11 @@ public interface ObjectStreamConstants {
*/
final static short STREAM_MAGIC = (short)0xaced;
+ /**
+ * Magic number that is written to the stream header when using fastserilizer.
+ */
+ static final short STREAM_MAGIC_FAST = (short)0xdeca;
+
/**
* Version number that is written to the stream header.
*/
diff --git a/jdk/src/share/classes/sun/misc/Unsafe.java b/jdk/src/share/classes/sun/misc/Unsafe.java
index 99e465802..92fb01669 100644
--- a/jdk/src/share/classes/sun/misc/Unsafe.java
+++ b/jdk/src/share/classes/sun/misc/Unsafe.java
@@ -433,6 +433,8 @@ public final class Unsafe {
/** @see #putByte(long, byte) */
public native void putDouble(long address, double x);
+ public native boolean getUseFastSerializer();
+
/**
* Fetches a native pointer from a given memory address. If the address is
* zero, or does not point into a block obtained from {@link

View File

@ -0,0 +1,46 @@
diff --git a/jdk/src/share/classes/java/lang/Long.java b/jdk/src/share/classes/java/lang/Long.java
index 58c2cc3ba..7b6e14a97 100644
--- a/jdk/src/share/classes/java/lang/Long.java
+++ b/jdk/src/share/classes/java/lang/Long.java
@@ -812,12 +812,11 @@ public final class Long extends Number implements Comparable<Long> {
static final Long cache[];
static {
-
+ int h = 127;
String longCacheHighPropValue =
sun.misc.VM.getSavedProperty("java.lang.Long.LongCache.high");
if (longCacheHighPropValue != null) {
// high value may be configured by property
- int h = 0;
try {
int i = Integer.parseInt(longCacheHighPropValue);
i = Math.max(i, 127);
@@ -826,21 +825,13 @@ public final class Long extends Number implements Comparable<Long> {
} catch( NumberFormatException nfe) {
// If the property cannot be parsed into an int, ignore it.
}
- high = h;
- low = -h - 1;
- cache = new Long[(high - low) + 1];
- int j = low;
- for(int k = 0; k < cache.length; k++)
- cache[k] = new Long(j++);
-
- } else {
- low = -128;
- high = 127;
- cache = new Long[(high - low) + 1];
- int j = low;
- for(int k = 0; k < cache.length; k++)
- cache[k] = new Long(j++);
}
+ high = h;
+ low = -h - 1;
+ cache = new Long[(high - low) + 1];
+ int j = low;
+ for(int k = 0; k < cache.length; k++)
+ cache[k] = new Long(j++);
}
}

View File

@ -915,7 +915,7 @@ Provides: java-%{javaver}-%{origin}-accessibility%{?1} = %{epoch}:%{version}-%{r
Name: java-%{javaver}-%{origin}
Version: %{javaver}.%{updatever}.%{buildver}
Release: 0
Release: 2
# java-1.5.0-ibm from jpackage.org set Epoch to 1 for unknown reasons
# and this change was brought into RHEL-4. java-1.5.0-ibm packages
# also included the epoch in their virtual provides. This created a
@ -1031,12 +1031,18 @@ Patch89: 8144993-Elide-redundant-memory-barrier-after-AllocationNode.patch
Patch90: 8223504-improve-performance-of-forall-loops-by-better.patch
Patch91: add-vm-option-BoxTypeCachedMax-for-Integer-and-Long-cache.patch
Patch92: 8080289-8040213-8189067-move-the-store-out-of-the-loop.patch
Patch93: fast-serializer-jdk8.patch
Patch94: 8182397-race-in-field-updates.patch
Patch95: 8205921-Optimizing-best-of-2-work-stealing-queue-selection.patch
# 8u265
Patch96: fix-Long-cache-range-and-remove-VM-option-java.lang.IntegerCache.high-by-default.patch
Patch97: leaf-optimize-in-ParallelScanvageGC.patch
Patch98: 8046294-Generate-the-4-byte-timestamp-randomly.patch
Patch100: 8203481-Incorrect-constraint-for-unextended_sp-in-frame-safe_for_sender.patch
Patch102: fix-LongCache-s-range-when-BoxTypeCachedMax-number-is-bigger-than-Integer.MAX_VALUE.patch
Patch103: Ddot-intrinsic-implement.patch
Patch104: 8234003-Improve-IndexSet-iteration.patch
Patch105: 8220159-Optimize-various-RegMask-operations-by-introducing-watermarks.patch
#############################################
#
@ -1428,10 +1434,17 @@ pushd %{top_level_dir_name}
%patch90 -p1
%patch91 -p1
%patch92 -p1
%patch93 -p1
%patch94 -p1
%patch95 -p1
%patch96 -p1
%patch97 -p1
%patch98 -p1
%patch100 -p1
%patch102 -p1
%patch103 -p1
%patch104 -p1
%patch105 -p1
popd
@ -2051,6 +2064,19 @@ require "copy_jdk_configs.lua"
%endif
%changelog
* Mon Sep 1 2020 jdkboy <guoge1@huawei.com> - 1:1.8.0.265-b10.2
- Remove fast-serializer-jdk8.patch
* Tue Aug 29 2020 jdkboy <guoge1@huawei.com> - 1:1.8.0.265-b10.1
- Add leaf-optimize-in-ParallelScanvageGC.patch
- Add 8046294-Generate-the-4-byte-timestamp-randomly.patch
- Add 8203481-Incorrect-constraint-for-unextended_sp-in-frame-safe_for_sender.patch
- Add fix-LongCache-s-range-when-BoxTypeCachedMax-number-is-bigger-than-Integer.MAX_VALUE.patch
- Add Ddot-intrinsic-implement.patch
- Add 8234003-Improve-IndexSet-iteration.patch
- Add 8220159-Optimize-various-RegMask-operations-by-introducing-watermarks.patch
- Remove prohibition-of-irreducible-loop-in-mergers.patch
* Tue Aug 25 2020 noah <hedongbo@huawei.com> - 1:1.8.0.265-b10.0
- Update to aarch64-shenandoah-jdk8u-8u265-b01
- add fix-Long-cache-range-and-remove-VM-option-java.lang.IntegerCache.high-by-default.patch

View File

@ -0,0 +1,210 @@
diff --git a/hotspot/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp b/hotspot/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp
index fba64e15f..1c92314f9 100644
--- a/hotspot/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp
+++ b/hotspot/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp
@@ -131,6 +131,14 @@ inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t*
return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
}
+inline intptr_t Atomic::relax_cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value)
+{
+ intptr_t value = compare_value;
+ __atomic_compare_exchange(dest, &value, &exchange_value, /* weak */false,
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+ return value;
+}
+
inline void* Atomic::cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value)
{
return (void *) cmpxchg_ptr((intptr_t) exchange_value,
diff --git a/hotspot/src/share/vm/classfile/classFileParser.cpp b/hotspot/src/share/vm/classfile/classFileParser.cpp
index 07d07e4f2..f001a94e7 100644
--- a/hotspot/src/share/vm/classfile/classFileParser.cpp
+++ b/hotspot/src/share/vm/classfile/classFileParser.cpp
@@ -4393,6 +4393,11 @@ void ClassFileParser::fill_oop_maps(instanceKlassHandle k,
OopMapBlock* this_oop_map = k->start_of_nonstatic_oop_maps();
const InstanceKlass* const super = k->superklass();
const unsigned int super_count = super ? super->nonstatic_oop_map_count() : 0;
+
+ const bool super_is_gc_leaf = super ? super->oop_is_gc_leaf() : true;
+ bool this_is_gc_leaf = super_is_gc_leaf && (nonstatic_oop_map_count == 0);
+ k->set_oop_is_gc_leaf(this_is_gc_leaf);
+
if (super_count > 0) {
// Copy maps from superklass
OopMapBlock* super_oop_map = super->start_of_nonstatic_oop_maps();
diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
index b2de74d41..dde9ac426 100644
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
@@ -49,7 +49,12 @@ inline void PSPromotionManager::claim_or_forward_internal_depth(T* p) {
}
oopDesc::encode_store_heap_oop_not_null(p, o);
} else {
- push_depth(p);
+ // leaf object copy in advanced, reduce cost of push and pop
+ if (!o->klass()->oop_is_gc_leaf()) {
+ push_depth(p);
+ } else {
+ PSScavenge::copy_and_push_safe_barrier<T, false>(this, p);
+ }
}
}
}
@@ -171,7 +176,15 @@ oop PSPromotionManager::copy_to_survivor_space(oop o) {
Copy::aligned_disjoint_words((HeapWord*)o, (HeapWord*)new_obj, new_obj_size);
// Now we have to CAS in the header.
+#ifdef AARCH64
+ // CAS with memory fence cost a lot within copy_to_survivor_space on aarch64.
+ // To minimize the cost, we use a normal CAS to do object forwarding, plus a
+ // memory fence only upon CAS succeeds. To further reduce the fence insertion,
+ // we can skip the fence insertion for leaf objects (objects don't have reference fields).
+ if (o->relax_cas_forward_to(new_obj, test_mark)) {
+#else
if (o->cas_forward_to(new_obj, test_mark)) {
+#endif
// We won any races, we "own" this object.
assert(new_obj == o->forwardee(), "Sanity");
@@ -195,10 +208,13 @@ oop PSPromotionManager::copy_to_survivor_space(oop o) {
push_depth(masked_o);
TASKQUEUE_STATS_ONLY(++_arrays_chunked; ++_masked_pushes);
} else {
- // we'll just push its contents
- new_obj->push_contents(this);
+ // leaf object don't have contents, never need push_contents
+ if (!o->klass()->oop_is_gc_leaf()) {
+ // we'll just push its contents
+ new_obj->push_contents(this);
+ }
}
- } else {
+ } else {
// We lost, someone else "owns" this object
guarantee(o->is_forwarded(), "Object must be forwarded if the cas failed.");
diff --git a/hotspot/src/share/vm/oops/klass.cpp b/hotspot/src/share/vm/oops/klass.cpp
index 7fda7ce62..6e8f9acde 100644
--- a/hotspot/src/share/vm/oops/klass.cpp
+++ b/hotspot/src/share/vm/oops/klass.cpp
@@ -207,6 +207,8 @@ Klass::Klass() {
clear_modified_oops();
clear_accumulated_modified_oops();
_shared_class_path_index = -1;
+
+ set_oop_is_gc_leaf(false);
}
jint Klass::array_layout_helper(BasicType etype) {
diff --git a/hotspot/src/share/vm/oops/klass.hpp b/hotspot/src/share/vm/oops/klass.hpp
index 22ae48f5c..4aea54795 100644
--- a/hotspot/src/share/vm/oops/klass.hpp
+++ b/hotspot/src/share/vm/oops/klass.hpp
@@ -177,6 +177,8 @@ class Klass : public Metadata {
jbyte _modified_oops; // Card Table Equivalent (YC/CMS support)
jbyte _accumulated_modified_oops; // Mod Union Equivalent (CMS support)
+ bool _is_gc_leaf;
+
private:
// This is an index into FileMapHeader::_classpath_entry_table[], to
// associate this class with the JAR file where it's loaded from during
@@ -569,6 +571,9 @@ protected:
oop_is_typeArray_slow()); }
#undef assert_same_query
+ void set_oop_is_gc_leaf(bool is_gc_leaf) { _is_gc_leaf = is_gc_leaf; }
+ inline bool oop_is_gc_leaf() const { return _is_gc_leaf; }
+
// Access flags
AccessFlags access_flags() const { return _access_flags; }
void set_access_flags(AccessFlags flags) { _access_flags = flags; }
diff --git a/hotspot/src/share/vm/oops/oop.hpp b/hotspot/src/share/vm/oops/oop.hpp
index a703a54ef..41a7bce4d 100644
--- a/hotspot/src/share/vm/oops/oop.hpp
+++ b/hotspot/src/share/vm/oops/oop.hpp
@@ -76,6 +76,9 @@ class oopDesc {
void release_set_mark(markOop m);
markOop cas_set_mark(markOop new_mark, markOop old_mark);
+#ifdef AARCH64
+ markOop relax_cas_set_mark(markOop new_mark, markOop old_mark);
+#endif
// Used only to re-initialize the mark word (e.g., of promoted
// objects during a GC) -- requires a valid klass pointer
@@ -317,6 +320,10 @@ class oopDesc {
void forward_to(oop p);
bool cas_forward_to(oop p, markOop compare);
+#ifdef AARCH64
+ bool relax_cas_forward_to(oop p, markOop compare);
+#endif
+
#if INCLUDE_ALL_GCS
// Like "forward_to", but inserts the forwarding pointer atomically.
// Exactly one thread succeeds in inserting the forwarding pointer, and
diff --git a/hotspot/src/share/vm/oops/oop.inline.hpp b/hotspot/src/share/vm/oops/oop.inline.hpp
index d4c4d75c0..c3abdb128 100644
--- a/hotspot/src/share/vm/oops/oop.inline.hpp
+++ b/hotspot/src/share/vm/oops/oop.inline.hpp
@@ -76,6 +76,12 @@ inline markOop oopDesc::cas_set_mark(markOop new_mark, markOop old_mark) {
return (markOop) Atomic::cmpxchg_ptr(new_mark, &_mark, old_mark);
}
+#ifdef AARCH64
+inline markOop oopDesc::relax_cas_set_mark(markOop new_mark, markOop old_mark) {
+ return (markOop)Atomic::relax_cmpxchg_ptr((intptr_t)new_mark, (volatile intptr_t*)&_mark, (intptr_t)old_mark);
+}
+#endif
+
inline Klass* oopDesc::klass() const {
if (UseCompressedClassPointers) {
return Klass::decode_klass_not_null(_metadata._compressed_klass);
@@ -715,6 +721,30 @@ inline bool oopDesc::cas_forward_to(oop p, markOop compare) {
return cas_set_mark(m, compare) == compare;
}
+#ifdef AARCH64
+inline bool oopDesc::relax_cas_forward_to(oop p, markOop compare) {
+ assert(check_obj_alignment(p),
+ "forwarding to something not aligned");
+ assert(Universe::heap()->is_in_reserved(p),
+ "forwarding to something not in heap");
+ markOop m = markOopDesc::encode_pointer_as_mark(p);
+ assert(m->decode_pointer() == p, "encoding must be reversable");
+ markOop old_markoop = relax_cas_set_mark(m, compare);
+ // If CAS succeeded, we must ensure the copy visible to threads reading the forwardee.
+ // (We might delay the fence insertion till pushing contents to task stack as other threads
+ // only need to touch the copied object after stolen the task.)
+ if (old_markoop == compare) {
+ // Once the CAS succeeds, leaf object never needs to be visible to other threads (finished
+ // collection by current thread), so we can save the fence.
+ if (!p->klass()->oop_is_gc_leaf()) {
+ OrderAccess::fence();
+ }
+ return true;
+ }
+ return false;
+}
+#endif
+
// Note that the forwardee is not the same thing as the displaced_mark.
// The forwardee is used when copying during scavenge and mark-sweep.
// It does need to clear the low two locking- and GC-related bits.
diff --git a/hotspot/src/share/vm/runtime/atomic.hpp b/hotspot/src/share/vm/runtime/atomic.hpp
index 9ca5fce97..015178b61 100644
--- a/hotspot/src/share/vm/runtime/atomic.hpp
+++ b/hotspot/src/share/vm/runtime/atomic.hpp
@@ -94,6 +94,10 @@ class Atomic : AllStatic {
unsigned int compare_value);
inline static intptr_t cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value);
+#ifdef AARCH64
+ inline static intptr_t relax_cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value);
+#endif
+
inline static void* cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value);
};

View File

@ -1,27 +0,0 @@
From 34712f6bbc3c2c664ee641c78d4a2f8cfe427880 Mon Sep 17 00:00:00 2001
Date: Fri, 28 Feb 2020 15:17:44 +0000
Subject: [PATCH] prohibition of irreducible loop in mergers
Summary: C2Compiler: irreducible loop should not enter merge_many_backedges
LLT: NA
Bug url: NA
---
hotspot/src/share/vm/opto/loopnode.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/hotspot/src/share/vm/opto/loopnode.cpp b/hotspot/src/share/vm/opto/loopnode.cpp
index e2c0645cf8..bbb2e2bf98 100644
--- a/hotspot/src/share/vm/opto/loopnode.cpp
+++ b/hotspot/src/share/vm/opto/loopnode.cpp
@@ -1542,7 +1542,7 @@ bool IdealLoopTree::beautify_loops( PhaseIdealLoop *phase ) {
// If I am a shared header (multiple backedges), peel off the many
// backedges into a private merge point and use the merge point as
// the one true backedge.
- if( _head->req() > 3 ) {
+ if( _head->req() > 3 && !_irreducible) {
// Merge the many backedges into a single backedge but leave
// the hottest backedge as separate edge for the following peel.
merge_many_backedges( phase );
--
2.12.3