!26 Add several enhancement patches & Remove fast-serializer-jdk8.patch

Merge pull request !26 from jdkboy/master
2020-09-02 09:08:48 +08:00 · 2020-09-02 09:08:48 +08:00 · d8648983fa
commit d8648983fa
parent 83c63fa05d 198e1d750a
10 changed files with 2623 additions and 709 deletions
--- a/8046294-Generate-the-4-byte-timestamp-randomly.patch
+++ b/8046294-Generate-the-4-byte-timestamp-randomly.patch
@ -0,0 +1,87 @@
+diff --git a/jdk/src/share/classes/sun/security/ssl/RandomCookie.java b/jdk/src/share/classes/sun/security/ssl/RandomCookie.java
+index 5f414c408..ce27f0df4 100644
+--- a/jdk/src/share/classes/sun/security/ssl/RandomCookie.java
+++ b/jdk/src/share/classes/sun/security/ssl/RandomCookie.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 1996, 2007, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2016, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -41,21 +41,8 @@ final class RandomCookie {
+     byte random_bytes[];  // exactly 32 bytes
+ 
+     RandomCookie(SecureRandom generator) {
+-        long temp = System.currentTimeMillis() / 1000;
+-        int gmt_unix_time;
+-        if (temp < Integer.MAX_VALUE) {
+-            gmt_unix_time = (int) temp;
+-        } else {
+-            gmt_unix_time = Integer.MAX_VALUE;          // Whoops!
+-        }
+-
+         random_bytes = new byte[32];
+         generator.nextBytes(random_bytes);
+-
+-        random_bytes[0] = (byte)(gmt_unix_time >> 24);
+-        random_bytes[1] = (byte)(gmt_unix_time >> 16);
+-        random_bytes[2] = (byte)(gmt_unix_time >>  8);
+-        random_bytes[3] = (byte)gmt_unix_time;
+     }
+ 
+     RandomCookie(HandshakeInStream m) throws IOException {
+@@ -68,22 +55,15 @@ final class RandomCookie {
+     }
+ 
+     void print(PrintStream s) {
+-        int i, gmt_unix_time;
+-
+-        gmt_unix_time = random_bytes[0] << 24;
+-        gmt_unix_time += random_bytes[1] << 16;
+-        gmt_unix_time += random_bytes[2] << 8;
+-        gmt_unix_time += random_bytes[3];
+-
+-        s.print("GMT: " + gmt_unix_time + " ");
+-        s.print("bytes = { ");
+-
+-        for (i = 4; i < 32; i++) {
+-            if (i != 4) {
+-                s.print(", ");
+        s.print("random_bytes = {");
+        for (int i = 0; i < 32; i++) {
+            int k = random_bytes[i] & 0xFF;
+            if (i != 0) {
+                s.print(' ');
+             }
+-            s.print(random_bytes[i] & 0x0ff);
+            s.print(Utilities.hexDigits[k >>> 4]);
+            s.print(Utilities.hexDigits[k & 0xf]);
+         }
+-        s.println(" }");
+        s.println("}");
+     }
+ }
+diff --git a/jdk/src/share/classes/sun/security/ssl/Utilities.java b/jdk/src/share/classes/sun/security/ssl/Utilities.java
+index aefb02c9a..9b267f6e1 100644
+--- a/jdk/src/share/classes/sun/security/ssl/Utilities.java
+++ b/jdk/src/share/classes/sun/security/ssl/Utilities.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -33,6 +33,11 @@ import sun.net.util.IPAddressUtil;
+  * A utility class to share the static methods.
+  */
+ final class Utilities {
+    /**
+     * hex digits
+     */
+    static final char[] hexDigits = "0123456789ABCDEF".toCharArray();
+
+     /**
+      * Puts {@code hostname} into the {@code serverNames} list.
+      * <P>
--- a/8203481-Incorrect-constraint-for-unextended_sp-in-frame-safe_for_sender.patch
+++ b/8203481-Incorrect-constraint-for-unextended_sp-in-frame-safe_for_sender.patch
@ -0,0 +1,28 @@
+diff --git a/hotspot/src/cpu/aarch64/vm/frame_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/frame_aarch64.cpp
+index 65a441240..1e534d3da 100644
+--- a/hotspot/src/cpu/aarch64/vm/frame_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/frame_aarch64.cpp
+@@ -71,10 +71,20 @@ bool frame::safe_for_sender(JavaThread *thread) {
+     return false;
+   }
+ 
+-  // unextended sp must be within the stack and above or equal sp
+-  bool unextended_sp_safe = (unextended_sp < thread->stack_base()) &&
+-                            (unextended_sp >= sp);
+  // When we are running interpreted code the machine stack pointer, SP, is
+  // set low enough so that the Java expression stack can grow and shrink
+  // without ever exceeding the machine stack bounds.  So, ESP >= SP.
+ 
+  // When we call out of an interpreted method, SP is incremented so that
+  // the space between SP and ESP is removed.  The SP saved in the callee's
+  // frame is the SP *before* this increment.  So, when we walk a stack of
+  // interpreter frames the sender's SP saved in a frame might be less than
+  // the SP at the point of call.
+
+  // So unextended sp must be within the stack but we need not to check
+  // that unextended sp >= sp
+
+  bool unextended_sp_safe = (unextended_sp < thread->stack_base());
+   if (!unextended_sp_safe) {
+     return false;
+   }
--- a/8220159-Optimize-various-RegMask-operations-by-introducing-watermarks.patch
+++ b/8220159-Optimize-various-RegMask-operations-by-introducing-watermarks.patch
@ -0,0 +1,558 @@
+diff --git a/hotspot/src/share/vm/opto/chaitin.hpp b/hotspot/src/share/vm/opto/chaitin.hpp
+index de6d443cd..abbd4449f 100644
+--- a/hotspot/src/share/vm/opto/chaitin.hpp
+++ b/hotspot/src/share/vm/opto/chaitin.hpp
+@@ -111,9 +111,9 @@ public:
+     _msize_valid=1;
+     if (_is_vector) {
+       assert(!_fat_proj, "sanity");
+-      _mask.verify_sets(_num_regs);
+      assert(_mask.is_aligned_sets(_num_regs), "mask is not aligned, adjacent sets");
+     } else if (_num_regs == 2 && !_fat_proj) {
+-      _mask.verify_pairs();
+      assert(_mask.is_aligned_pairs(), "mask is not aligned, adjacent pairs");
+     }
+ #endif
+   }
+diff --git a/hotspot/src/share/vm/opto/regmask.cpp b/hotspot/src/share/vm/opto/regmask.cpp
+index 352ccfb9d..d92f09eb6 100644
+--- a/hotspot/src/share/vm/opto/regmask.cpp
+++ b/hotspot/src/share/vm/opto/regmask.cpp
+@@ -74,7 +74,8 @@ int find_lowest_bit( uint32 mask ) {
+ }
+ 
+ // Find highest 1, or return 32 if empty
+-int find_hihghest_bit( uint32 mask ) {
+int find_highest_bit( uint32 mask ) {
+  assert(mask != 0, "precondition");
+   int n = 0;
+   if( mask > 0xffff ) {
+     mask >>= 16;
+@@ -167,13 +168,14 @@ OptoReg::Name RegMask::find_first_pair() const {
+ //------------------------------ClearToPairs-----------------------------------
+ // Clear out partial bits; leave only bit pairs
+ void RegMask::clear_to_pairs() {
+-  for( int i = 0; i < RM_SIZE; i++ ) {
+  assert(valid_watermarks(), "sanity");
+  for( int i = _lwm; i < _hwm; i++ ) {
+     int bits = _A[i];
+     bits &= ((bits & 0x55555555)<<1); // 1 hi-bit set for each pair
+     bits |= (bits>>1);          // Smear 1 hi-bit into a pair
+     _A[i] = bits;
+   }
+-  verify_pairs();
+  assert(is_aligned_pairs(), "mask is not aligned, adjacent pairs");
+ }
+ 
+ //------------------------------SmearToPairs-----------------------------------
+@@ -188,10 +190,14 @@ void RegMask::smear_to_pairs() {
+   verify_pairs();
+ }
+ 
+-//------------------------------is_aligned_pairs-------------------------------
+bool RegMask::is_misaligned_pair() const {
+  return Size() == 2 && !is_aligned_pairs();
+}
+
+ bool RegMask::is_aligned_pairs() const {
+   // Assert that the register mask contains only bit pairs.
+-  for( int i = 0; i < RM_SIZE; i++ ) {
+  assert(valid_watermarks(), "sanity");
+  for( int i = _lwm; i < _hwm; i++ ) {
+     int bits = _A[i];
+     while( bits ) {             // Check bits for pairing
+       int bit = bits & -bits;   // Extract low bit
+@@ -206,39 +212,28 @@ bool RegMask::is_aligned_pairs() const {
+   return true;
+ }
+ 
+-//------------------------------is_bound1--------------------------------------
+-// Return TRUE if the mask contains a single bit
+-int RegMask::is_bound1() const {
+-  if( is_AllStack() ) return false;
+-  int bit = -1;                 // Set to hold the one bit allowed
+-  for( int i = 0; i < RM_SIZE; i++ ) {
+-    if( _A[i] ) {               // Found some bits
+-      if( bit != -1 ) return false; // Already had bits, so fail
+-      bit = _A[i] & -_A[i];     // Extract 1 bit from mask
+-      if( bit != _A[i] ) return false; // Found many bits, so fail
+-    }
+-  }
+-  // True for both the empty mask and for a single bit
+-  return true;
+bool RegMask::is_bound1() const {
+  if (is_AllStack()) return false;
+  return Size() == 1;
+ }
+ 
+ //------------------------------is_bound2--------------------------------------
+ // Return TRUE if the mask contains an adjacent pair of bits and no other bits.
+-int RegMask::is_bound_pair() const {
+bool RegMask::is_bound_pair() const {
+   if( is_AllStack() ) return false;
+-
+  assert(valid_watermarks(), "sanity");
+   int bit = -1;                 // Set to hold the one bit allowed
+-  for( int i = 0; i < RM_SIZE; i++ ) {
+-    if( _A[i] ) {               // Found some bits
+-      if( bit != -1 ) return false; // Already had bits, so fail
+-      bit = _A[i] & -(_A[i]);   // Extract 1 bit from mask
+-      if( (bit << 1) != 0 ) {   // Bit pair stays in same word?
+  for( int i = _lwm; i <= _hwm; i++ ) {
+    if( _A[i] ) {                 // Found some bits
+      if( bit != -1) return false; // Already had bits, so fail
+      bit = _A[i] & -(_A[i]);      // Extract 1 bit from mask
+      if( (bit << 1) != 0 ) {       // Bit pair stays in same word?
+         if( (bit | (bit<<1)) != _A[i] )
+-          return false;         // Require adjacent bit pair and no more bits
+-      } else {                  // Else its a split-pair case
+          return false;            // Require adjacent bit pair and no more bits
+      } else {                     // Else its a split-pair case
+         if( bit != _A[i] ) return false; // Found many bits, so fail
+-        i++;                    // Skip iteration forward
+-        if( i >= RM_SIZE || _A[i] != 1 )
+        i++;                       // Skip iteration forward
+        if( i > _hwm || _A[i] != 1 )
+           return false; // Require 1 lo bit in next word
+       }
+     }
+@@ -247,31 +242,44 @@ int RegMask::is_bound_pair() const {
+   return true;
+ }
+ 
+// Test for a single adjacent set of ideal register's size.
+bool RegMask::is_bound(uint ireg) const {
+  if (is_vector(ireg)) {
+    if (is_bound_set(num_registers(ireg)))
+      return true;
+  } else if (is_bound1() || is_bound_pair()) {
+    return true;
+  }
+  return false;
+}
+
+
+
+ static int low_bits[3] = { 0x55555555, 0x11111111, 0x01010101 };
+-//------------------------------find_first_set---------------------------------
+
+ // Find the lowest-numbered register set in the mask.  Return the
+ // HIGHEST register number in the set, or BAD if no sets.
+ // Works also for size 1.
+ OptoReg::Name RegMask::find_first_set(const int size) const {
+-  verify_sets(size);
+-  for (int i = 0; i < RM_SIZE; i++) {
+  assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
+  assert(valid_watermarks(), "sanity");
+  for (int i = _lwm; i <= _hwm; i++) {
+     if (_A[i]) {                // Found some bits
+-      int bit = _A[i] & -_A[i]; // Extract low bit
+       // Convert to bit number, return hi bit in pair
+-      return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(bit)+(size-1));
+      return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(_A[i])+(size-1));
+     }
+   }
+   return OptoReg::Bad;
+ }
+ 
+-//------------------------------clear_to_sets----------------------------------
+ // Clear out partial bits; leave only aligned adjacent bit pairs
+ void RegMask::clear_to_sets(const int size) {
+   if (size == 1) return;
+   assert(2 <= size && size <= 8, "update low bits table");
+   assert(is_power_of_2(size), "sanity");
+  assert(valid_watermarks(), "sanity");
+   int low_bits_mask = low_bits[size>>2];
+-  for (int i = 0; i < RM_SIZE; i++) {
+  for (int i = _lwm; i <= _hwm; i++) {
+     int bits = _A[i];
+     int sets = (bits & low_bits_mask);
+     for (int j = 1; j < size; j++) {
+@@ -286,17 +294,17 @@ void RegMask::clear_to_sets(const int size) {
+     }
+     _A[i] = sets;
+   }
+-  verify_sets(size);
+  assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
+ }
+ 
+-//------------------------------smear_to_sets----------------------------------
+ // Smear out partial bits to aligned adjacent bit sets
+ void RegMask::smear_to_sets(const int size) {
+   if (size == 1) return;
+   assert(2 <= size && size <= 8, "update low bits table");
+   assert(is_power_of_2(size), "sanity");
+  assert(valid_watermarks(), "sanity");
+   int low_bits_mask = low_bits[size>>2];
+-  for (int i = 0; i < RM_SIZE; i++) {
+  for (int i = _lwm; i <= _hwm; i++) {
+     int bits = _A[i];
+     int sets = 0;
+     for (int j = 0; j < size; j++) {
+@@ -312,17 +320,17 @@ void RegMask::smear_to_sets(const int size) {
+     }
+     _A[i] = sets;
+   }
+-  verify_sets(size);
+  assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
+ }
+ 
+-//------------------------------is_aligned_set--------------------------------
+// Assert that the register mask contains only bit sets.
+ bool RegMask::is_aligned_sets(const int size) const {
+   if (size == 1) return true;
+   assert(2 <= size && size <= 8, "update low bits table");
+   assert(is_power_of_2(size), "sanity");
+   int low_bits_mask = low_bits[size>>2];
+-  // Assert that the register mask contains only bit sets.
+-  for (int i = 0; i < RM_SIZE; i++) {
+  assert(valid_watermarks(), "sanity");
+  for (int i = _lwm; i <= _hwm; i++) {
+     int bits = _A[i];
+     while (bits) {              // Check bits for pairing
+       int bit = bits & -bits;   // Extract low bit
+@@ -339,14 +347,14 @@ bool RegMask::is_aligned_sets(const int size) const {
+   return true;
+ }
+ 
+-//------------------------------is_bound_set-----------------------------------
+ // Return TRUE if the mask contains one adjacent set of bits and no other bits.
+ // Works also for size 1.
+ int RegMask::is_bound_set(const int size) const {
+   if( is_AllStack() ) return false;
+   assert(1 <= size && size <= 8, "update low bits table");
+  assert(valid_watermarks(), "sanity");
+   int bit = -1;                 // Set to hold the one bit allowed
+-  for (int i = 0; i < RM_SIZE; i++) {
+  for (int i = _lwm; i <= _hwm; i++) {
+     if (_A[i] ) {               // Found some bits
+       if (bit != -1)
+        return false;            // Already had bits, so fail
+@@ -364,7 +372,7 @@ int RegMask::is_bound_set(const int size) const {
+         int set = bit>>24;
+         set = set & -set; // Remove sign extension.
+         set = (((set << size) - 1) >> 8);
+-        if (i >= RM_SIZE || _A[i] != set)
+	if (i > _hwm || _A[i] != set)
+           return false; // Require expected low bits in next word
+       }
+     }
+@@ -373,7 +381,6 @@ int RegMask::is_bound_set(const int size) const {
+   return true;
+ }
+ 
+-//------------------------------is_UP------------------------------------------
+ // UP means register only, Register plus stack, or stack only is DOWN
+ bool RegMask::is_UP() const {
+   // Quick common case check for DOWN (any stack slot is legal)
+@@ -386,22 +393,22 @@ bool RegMask::is_UP() const {
+   return true;
+ }
+ 
+-//------------------------------Size-------------------------------------------
+ // Compute size of register mask in bits
+ uint RegMask::Size() const {
+   extern uint8 bitsInByte[256];
+   uint sum = 0;
+-  for( int i = 0; i < RM_SIZE; i++ )
+  assert(valid_watermarks(), "sanity");
+  for( int i = _lwm; i <= _hwm; i++ ) {
+     sum +=
+       bitsInByte[(_A[i]>>24) & 0xff] +
+       bitsInByte[(_A[i]>>16) & 0xff] +
+       bitsInByte[(_A[i]>> 8) & 0xff] +
+       bitsInByte[ _A[i]      & 0xff];
+  }
+   return sum;
+ }
+ 
+ #ifndef PRODUCT
+-//------------------------------print------------------------------------------
+ void RegMask::dump(outputStream *st) const {
+   st->print("[");
+   RegMask rm = *this;           // Structure copy into local temp
+diff --git a/hotspot/src/share/vm/opto/regmask.hpp b/hotspot/src/share/vm/opto/regmask.hpp
+index 5ceebb3fb..6cef16ad7 100644
+--- a/hotspot/src/share/vm/opto/regmask.hpp
+++ b/hotspot/src/share/vm/opto/regmask.hpp
+@@ -44,27 +44,12 @@
+ # include "adfiles/adGlobals_ppc_64.hpp"
+ #endif
+ 
+-// Some fun naming (textual) substitutions:
+-//
+-// RegMask::get_low_elem() ==> RegMask::find_first_elem()
+-// RegMask::Special        ==> RegMask::Empty
+-// RegMask::_flags         ==> RegMask::is_AllStack()
+-// RegMask::operator<<=()  ==> RegMask::Insert()
+-// RegMask::operator>>=()  ==> RegMask::Remove()
+-// RegMask::Union()        ==> RegMask::OR
+-// RegMask::Inter()        ==> RegMask::AND
+-//
+-// OptoRegister::RegName   ==> OptoReg::Name
+-//
+-// OptoReg::stack0()       ==> _last_Mach_Reg  or ZERO in core version
+-//
+-// numregs in chaitin      ==> proper degree in chaitin
+ 
+ //-------------Non-zero bit search methods used by RegMask---------------------
+ // Find lowest 1, or return 32 if empty
+ int find_lowest_bit( uint32 mask );
+ // Find highest 1, or return 32 if empty
+-int find_hihghest_bit( uint32 mask );
+int find_highest_bit( uint32 mask );
+ 
+ //------------------------------RegMask----------------------------------------
+ // The ADL file describes how to print the machine-specific registers, as well
+@@ -97,6 +82,12 @@ class RegMask VALUE_OBJ_CLASS_SPEC {
+ 
+ public:
+   enum { CHUNK_SIZE = RM_SIZE*_WordBits };
+  // The low and high water marks represents the lowest and highest word
+  // that might contain set register mask bits, respectively. We guarantee
+  // that there are no bits in words outside this range, but any word at
+  // and between the two marks can still be 0.
+  int _lwm;
+  int _hwm;
+ 
+   // SlotsPerLong is 2, since slots are 32 bits and longs are 64 bits.
+   // Also, consider the maximum alignment size for a normally allocated
+@@ -126,13 +117,21 @@ public:
+ #   define BODY(I) _A[I] = a##I;
+     FORALL_BODY
+ #   undef BODY
+    _lwm = 0;
+    _hwm = RM_SIZE - 1;
+    while (_hwm > 0 && _A[_hwm] == 0) _hwm--;
+    while ((_lwm < _hwm) && _A[_lwm] == 0) _lwm++;
+    assert(valid_watermarks(), "post-condition");
+   }
+ 
+   // Handy copying constructor
+   RegMask( RegMask *rm ) {
+-#   define BODY(I) _A[I] = rm->_A[I];
+-    FORALL_BODY
+-#   undef BODY
+    _hwm = rm->_hwm;
+    _lwm = rm->_lwm;
+    for (int i = 0; i < RM_SIZE; i++) {
+      _A[i] = rm->_A[i];
+    }
+    assert(valid_watermarks(), "post-condition");
+   }
+ 
+   // Construct an empty mask
+@@ -162,30 +161,36 @@ public:
+ 
+   // Test for being a not-empty mask.
+   int is_NotEmpty( ) const {
+    assert(valid_watermarks(), "sanity");
+     int tmp = 0;
+-#   define BODY(I) tmp |= _A[I];
+-    FORALL_BODY
+-#   undef BODY
+    for (int i = _lwm; i <= _hwm; i++) {
+      tmp |= _A[i];
+    }
+     return tmp;
+   }
+ 
+   // Find lowest-numbered register from mask, or BAD if mask is empty.
+   OptoReg::Name find_first_elem() const {
+-    int base, bits;
+-#   define BODY(I) if( (bits = _A[I]) != 0 ) base = I<<_LogWordBits; else
+-    FORALL_BODY
+-#   undef BODY
+-      { base = OptoReg::Bad; bits = 1<<0; }
+-    return OptoReg::Name(base + find_lowest_bit(bits));
+    assert(valid_watermarks(), "sanity");
+    for (int i = _lwm; i <= _hwm; i++) {
+      int bits = _A[i];
+      if (bits) {
+        return OptoReg::Name((i<<_LogWordBits) + find_lowest_bit(bits));
+      }
+    }
+    return OptoReg::Name(OptoReg::Bad);
+   }
+
+   // Get highest-numbered register from mask, or BAD if mask is empty.
+   OptoReg::Name find_last_elem() const {
+-    int base, bits;
+-#   define BODY(I) if( (bits = _A[RM_SIZE-1-I]) != 0 ) base = (RM_SIZE-1-I)<<_LogWordBits; else
+-    FORALL_BODY
+-#   undef BODY
+-      { base = OptoReg::Bad; bits = 1<<0; }
+-    return OptoReg::Name(base + find_hihghest_bit(bits));
+    assert(valid_watermarks(), "sanity");
+    for (int i = _hwm; i >= _lwm; i--) {
+      int bits = _A[i];
+      if (bits) {
+        return OptoReg::Name((i<<_LogWordBits) + find_highest_bit(bits));
+      }
+    }
+    return OptoReg::Name(OptoReg::Bad);
+   }
+ 
+   // Find the lowest-numbered register pair in the mask.  Return the
+@@ -199,25 +204,34 @@ public:
+   void smear_to_pairs();
+   // Verify that the mask contains only aligned adjacent bit pairs
+   void verify_pairs() const { assert( is_aligned_pairs(), "mask is not aligned, adjacent pairs" ); }
+
+#ifdef ASSERT
+  // Verify watermarks are sane, i.e., within bounds and that no
+  // register words below or above the watermarks have bits set.
+  bool valid_watermarks() const {
+    assert(_hwm >= 0 && _hwm < RM_SIZE, err_msg("_hwm out of range: %d", _hwm));
+    assert(_lwm >= 0 && _lwm < RM_SIZE, err_msg("_lwm out of range: %d", _lwm));
+    for (int i = 0; i < _lwm; i++) {
+      assert(_A[i] == 0, err_msg("_lwm too high: %d regs at: %d", _lwm, i));
+    }
+    for (int i = _hwm + 1; i < RM_SIZE; i++) {
+      assert(_A[i] == 0, err_msg("_hwm too low: %d regs at: %d", _hwm, i));
+    }
+    return true;
+  }
+#endif // !ASSERT
+
+   // Test that the mask contains only aligned adjacent bit pairs
+   bool is_aligned_pairs() const;
+ 
+   // mask is a pair of misaligned registers
+-  bool is_misaligned_pair() const { return Size()==2 && !is_aligned_pairs(); }
+  bool is_misaligned_pair() const;
+   // Test for single register
+-  int is_bound1() const;
+  bool is_bound1() const;
+   // Test for a single adjacent pair
+-  int is_bound_pair() const;
+  bool is_bound_pair() const;
+   // Test for a single adjacent set of ideal register's size.
+-  int is_bound(uint ireg) const {
+-    if (is_vector(ireg)) {
+-      if (is_bound_set(num_registers(ireg)))
+-        return true;
+-    } else if (is_bound1() || is_bound_pair()) {
+-      return true;
+-    }
+-    return false;
+-  }
+  bool is_bound(uint ireg) const;
+ 
+   // Find the lowest-numbered register set in the mask.  Return the
+   // HIGHEST register number in the set, or BAD if no sets.
+@@ -228,8 +242,6 @@ public:
+   void clear_to_sets(const int size);
+   // Smear out partial bits to aligned adjacent bit sets.
+   void smear_to_sets(const int size);
+-  // Verify that the mask contains only aligned adjacent bit sets
+-  void verify_sets(int size) const { assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); }
+   // Test that the mask contains only aligned adjacent bit sets
+   bool is_aligned_sets(const int size) const;
+ 
+@@ -244,11 +256,14 @@ public:
+ 
+   // Fast overlap test.  Non-zero if any registers in common.
+   int overlap( const RegMask &rm ) const {
+-    return
+-#   define BODY(I) (_A[I] & rm._A[I]) |
+-    FORALL_BODY
+-#   undef BODY
+-    0 ;
+    assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
+    int hwm = MIN2(_hwm, rm._hwm);
+    int lwm = MAX2(_lwm, rm._lwm);
+    int result = 0;
+    for (int i = lwm; i <= hwm; i++) {
+      result |= _A[i] & rm._A[i];
+    }
+    return result; 
+   }
+ 
+   // Special test for register pressure based splitting
+@@ -257,22 +272,29 @@ public:
+ 
+   // Clear a register mask
+   void Clear( ) {
+-#   define BODY(I) _A[I] = 0;
+-    FORALL_BODY
+-#   undef BODY
+    _lwm = RM_SIZE - 1;
+    _hwm = 0;
+    memset(_A, 0, sizeof(int)*RM_SIZE);
+    assert(valid_watermarks(), "sanity");
+   }
+ 
+   // Fill a register mask with 1's
+   void Set_All( ) {
+-#   define BODY(I) _A[I] = -1;
+-    FORALL_BODY
+-#   undef BODY
+    _lwm = 0;
+    _hwm = RM_SIZE - 1;
+    memset(_A, 0xFF, sizeof(int)*RM_SIZE);
+    assert(valid_watermarks(), "sanity");
+   }
+ 
+   // Insert register into mask
+   void Insert( OptoReg::Name reg ) {
+-    assert( reg < CHUNK_SIZE, "" );
+-    _A[reg>>_LogWordBits] |= (1<<(reg&(_WordBits-1)));
+    assert(reg < CHUNK_SIZE, "sanity");
+    assert(valid_watermarks(), "pre-condition");
+    int index = reg>>_LogWordBits;
+    if (index > _hwm) _hwm = index;
+    if (index < _lwm) _lwm = index;
+    _A[index] |= (1<<(reg&(_WordBits-1)));
+    assert(valid_watermarks(), "post-condition");
+   }
+ 
+   // Remove register from mask
+@@ -283,23 +305,38 @@ public:
+ 
+   // OR 'rm' into 'this'
+   void OR( const RegMask &rm ) {
+-#   define BODY(I) this->_A[I] |= rm._A[I];
+-    FORALL_BODY
+-#   undef BODY
+    assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
+    // OR widens the live range
+    if (_lwm > rm._lwm) _lwm = rm._lwm;
+    if (_hwm < rm._hwm) _hwm = rm._hwm;
+    for (int i = _lwm; i <= _hwm; i++) {
+      _A[i] |= rm._A[i];
+    }
+    assert(valid_watermarks(), "sanity");
+   }
+ 
+   // AND 'rm' into 'this'
+   void AND( const RegMask &rm ) {
+-#   define BODY(I) this->_A[I] &= rm._A[I];
+-    FORALL_BODY
+-#   undef BODY
+    assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
+    // Do not evaluate words outside the current watermark range, as they are
+    // already zero and an &= would not change that
+    for (int i = _lwm; i <= _hwm; i++) {
+      _A[i] &= rm._A[i];
+    }
+    // Narrow the watermarks if &rm spans a narrower range.
+    // Update after to ensure non-overlapping words are zeroed out.
+    if (_lwm < rm._lwm) _lwm = rm._lwm;
+    if (_hwm > rm._hwm) _hwm = rm._hwm;
+   }
+ 
+   // Subtract 'rm' from 'this'
+   void SUBTRACT( const RegMask &rm ) {
+-#   define BODY(I) _A[I] &= ~rm._A[I];
+-    FORALL_BODY
+-#   undef BODY
+    assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
+    int hwm = MIN2(_hwm, rm._hwm);
+    int lwm = MAX2(_lwm, rm._lwm);
+    for (int i = lwm; i <= hwm; i++) {
+      _A[i] &= ~rm._A[i];
+    } 
+   }
+ 
+   // Compute size of register mask: number of bits
--- a/8234003-Improve-IndexSet-iteration.patch
+++ b/8234003-Improve-IndexSet-iteration.patch
--- a/Ddot-intrinsic-implement.patch
+++ b/Ddot-intrinsic-implement.patch
@ -0,0 +1,479 @@
+diff --git a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
+index 1e9b1cb91..c0fd37d05 100644
+--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
+@@ -2061,6 +2061,14 @@ public:
+     ld_st(Vt, T, a, op1, op2);						\
+   }
+ 
+  void ld1_d(FloatRegister Vt, int index, const Address &a) {
+    starti;
+    assert(index == 0 || index == 1, "Index must be 0 or 1 for Vx.2D");
+    f(0, 31), f(index & 1, 30);
+    f(0b001101110, 29, 21), rf(a.index(), 16), f(0b1000, 15, 12);
+    f(0b01, 11, 10), rf(a.base(), 5), rf(Vt, 0);
+  }
+
+   INSN1(ld1,  0b001100010, 0b0111);
+   INSN2(ld1,  0b001100010, 0b1010);
+   INSN3(ld1,  0b001100010, 0b0110);
+@@ -2186,6 +2194,13 @@ public:
+ 
+ #undef INSN
+ 
+  void faddp_d(FloatRegister Vd, FloatRegister Vn) {
+    starti;
+    f(0b01, 31, 30), f(0b1111100, 29, 23), f(0b1, 22), f(0b11000, 21, 17);
+    f(0b0110110, 16, 10);
+    rf(Vn, 5), rf(Vd, 0);
+  }
+
+ #define INSN(NAME, opc)                                                                 \
+   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
+     starti;                                                                             \
+diff --git a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
+index f2f85df60..873da580b 100644
+--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
+@@ -2853,6 +2853,124 @@ void MacroAssembler::update_word_crc32(Register crc, Register v, Register tmp,
+   eor(crc, crc, tmp);
+ }
+ 
+/**
+ * Multiply and summation of 1 double-precision floating number pairs(sparse)
+ */
+void MacroAssembler::f2j_ddot_s1(Register dx, Register incx,
+                                 Register dy, Register incy) {
+  const FloatRegister tmpx = v2;
+  const FloatRegister tmpy = v3;
+
+  ld1_d(tmpx, 0, Address(dx, incx));
+  ld1_d(tmpy, 0, Address(dy, incy));
+  fmaddd(v0, tmpx, tmpy, v0);
+}
+
+/**
+ * Multiply and summation of 1 double-precision floating number pairs(dense)
+ */
+void MacroAssembler::f2j_ddot_d1(Register dx, Register dy, int size) {
+  const FloatRegister tmpx = v2;
+  const FloatRegister tmpy = v3;
+
+  ldrd(tmpx, post(dx, size));
+  ldrd(tmpy, post(dy, size));
+  fmaddd(v0, tmpx, tmpy, v0);
+}
+
+/**
+ * Multiply and summation of 4 double-precision floating numbers
+ */
+void MacroAssembler::f2j_ddot_d4(Register dx, Register dy) {
+  ld1(v2, v3, T2D, post(dx, 32));
+  ld1(v4, v5, T2D, post(dy, 32));
+  fmul(v2, T2D, v2, v4);
+  fmul(v3, T2D, v3, v5);
+  fadd(v0, T2D, v0, v2);
+  fadd(v6, T2D, v6, v3);
+}
+
+/**
+ * @param n         register containing the number of doubles in array
+ * @param dx        register pointing to input array
+ * @param incx      register containing step len for dx
+ * @param dy        register pointing to another input array
+ * @param incy      register containing step len for dy
+ * @param temp_reg  register containing loop variable
+ */
+void MacroAssembler::f2j_ddot(Register n, Register dx, Register incx,
+                              Register dy, Register incy, Register temp_reg) {
+  Label Ldot_EXIT, Ldot_S_BEGIN, Ldot_S1, Ldot_S10, Ldot_S4, Ldot_D_BEGIN,
+        Ldot_D1, Ldot_D10, Ldot_D4;
+
+  const int SZ = 8;
+
+    enter();
+    fmovd(v0, zr);
+    fmovd(v6, v0);
+
+    cmp(n, zr);
+    br(Assembler::LE, Ldot_EXIT);
+
+    cmp(incx, 1);
+    br(Assembler::NE, Ldot_S_BEGIN);
+    cmp(incy, 1);
+    br(Assembler::NE, Ldot_S_BEGIN);
+
+  BIND(Ldot_D_BEGIN);
+    asr(temp_reg, n, 2);
+    cmp(temp_reg, zr);
+    br(Assembler::LE, Ldot_D1);
+
+  BIND(Ldot_D4);
+    f2j_ddot_d4(dx, dy);
+    subs(temp_reg, temp_reg, 1);
+    br(Assembler::NE, Ldot_D4);
+
+    fadd(v0, T2D, v0, v6);
+    faddp_d(v0, v0);
+
+  BIND(Ldot_D1);
+    ands(temp_reg, n, 3);
+    br(Assembler::LE, Ldot_EXIT);
+
+  BIND(Ldot_D10);
+    f2j_ddot_d1(dx, dy, SZ);
+    subs(temp_reg, temp_reg, 1);
+    br(Assembler::NE, Ldot_D10);
+    leave();
+    ret(lr);
+
+  BIND(Ldot_S_BEGIN);
+    lsl(incx, incx, 3);
+    lsl(incy, incy, 3);
+
+    asr(temp_reg, n, 2);
+    cmp(temp_reg, zr);
+    br(Assembler::LE, Ldot_S1);
+
+  BIND(Ldot_S4);
+    f2j_ddot_s1(dx, incx, dy, incy);
+    f2j_ddot_s1(dx, incx, dy, incy);
+    f2j_ddot_s1(dx, incx, dy, incy);
+    f2j_ddot_s1(dx, incx, dy, incy);
+    subs(temp_reg, temp_reg, 1);
+    br(Assembler::NE, Ldot_S4);
+
+  BIND(Ldot_S1);
+    ands(temp_reg, n, 3);
+    br(Assembler::LE, Ldot_EXIT);
+
+  BIND(Ldot_S10);
+    f2j_ddot_s1(dx, incx, dy, incy);
+    subs(temp_reg, temp_reg, 1);
+    br(Assembler::NE, Ldot_S10);
+
+  BIND(Ldot_EXIT);
+    leave();
+    ret(lr);
+}
+
+ /**
+  * @param crc   register containing existing CRC (32-bit)
+  * @param buf   register pointing to input byte buffer (byte*)
+diff --git a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
+index 388177589..1abc7e3b0 100644
+--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
+@@ -1180,6 +1180,9 @@ public:
+         Register table0, Register table1, Register table2, Register table3,
+         bool upper = false);
+ 
+  void f2j_ddot(Register n, Register dx, Register incx,
+                  Register dy, Register incy, Register temp_reg);
+
+   void string_compare(Register str1, Register str2,
+ 		      Register cnt1, Register cnt2, Register result,
+ 		      Register tmp1);
+@@ -1236,6 +1239,11 @@ private:
+   // Uses rscratch2 if the address is not directly reachable
+   Address spill_address(int size, int offset, Register tmp=rscratch2);
+ 
+private:
+  void f2j_ddot_s1(Register dx, Register incx, Register dy, Register incy);
+  void f2j_ddot_d1(Register dx, Register dy, int size);
+  void f2j_ddot_d4(Register dx, Register dy);
+
+ public:
+   void spill(Register Rx, bool is64, int offset) {
+     if (is64) {
+diff --git a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
+index 0d73c0c0c..337d5c1dd 100644
+--- a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
+@@ -45,6 +45,7 @@
+ 
+ #include "stubRoutines_aarch64.hpp"
+ 
+
+ #ifdef COMPILER2
+ #include "opto/runtime.hpp"
+ #endif
+@@ -3220,6 +3221,39 @@ class StubGenerator: public StubCodeGenerator {
+     return start;
+   }
+ 
+  /**
+   *  Arguments:
+   *
+   * Inputs:
+   *   c_rarg0   - int n
+   *   c_rarg1   - double[] dx
+   *   c_rarg2   - int incx
+   *   c_rarg3   - double[] dy
+   *   c_rarg4   - int incy
+   *
+   * Output:
+   *       d0   - ddot result
+   *
+   */
+  address generate_ddotF2jBLAS() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "f2jblas_ddot");
+
+    address start = __ pc();
+
+    const Register n    = c_rarg0;
+    const Register dx   = c_rarg1;
+    const Register incx = c_rarg2;
+    const Register dy   = c_rarg3;
+    const Register incy = c_rarg4;
+
+    BLOCK_COMMENT("Entry:");
+
+    __ f2j_ddot(n, dx, incx, dy, incy, rscratch2);
+
+    return start;
+  }
+
+   /**
+    *  Arguments:
+    *
+@@ -4262,6 +4296,10 @@ class StubGenerator: public StubCodeGenerator {
+       StubRoutines::_montgomerySquare = g.generate_multiply();
+     }
+ 
+    if (UseF2jBLASIntrinsics) {
+      StubRoutines::_ddotF2jBLAS = generate_ddotF2jBLAS();
+    }
+
+     if (UseAESIntrinsics) {
+       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
+       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
+diff --git a/hotspot/src/share/vm/classfile/vmSymbols.hpp b/hotspot/src/share/vm/classfile/vmSymbols.hpp
+index 148f9212e..6bd8dbedd 100644
+--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp
+@@ -852,6 +852,12 @@
+    do_name(     implCompress_name,                                 "implCompress0")                                     \
+    do_signature(implCompress_signature,                            "([BI)V")                                            \
+                                                                                                                         \
+  /* support for com.github.fommil.netlib.F2jBLAS */                                                                    \
+  do_class(com_github_fommil_netlib_f2jblas,                       "com/github/fommil/netlib/F2jBLAS")                  \
+  do_intrinsic(_f2jblas_ddot, com_github_fommil_netlib_f2jblas, ddot_name, ddot_signature, F_R)                         \
+   do_name(     ddot_name,                                         "ddot")                                              \
+   do_signature(ddot_signature,                                    "(I[DI[DI)D")                                        \
+                                                                                                                        \
+   /* support for sun.security.provider.SHA2 */                                                                          \
+   do_class(sun_security_provider_sha2,                             "sun/security/provider/SHA2")                        \
+   do_intrinsic(_sha2_implCompress, sun_security_provider_sha2, implCompress_name, implCompress_signature, F_R)          \
+diff --git a/hotspot/src/share/vm/oops/method.cpp b/hotspot/src/share/vm/oops/method.cpp
+index 24fae4d30..64cdae9c7 100644
+--- a/hotspot/src/share/vm/oops/method.cpp
+++ b/hotspot/src/share/vm/oops/method.cpp
+@@ -1281,7 +1281,9 @@ vmSymbols::SID Method::klass_id_for_intrinsics(Klass* holder) {
+   // which does not use the class default class loader so we check for its loader here
+   InstanceKlass* ik = InstanceKlass::cast(holder);
+   if ((ik->class_loader() != NULL) && !SystemDictionary::is_ext_class_loader(ik->class_loader())) {
+-    return vmSymbols::NO_SID;   // regardless of name, no intrinsics here
+    if (!EnableIntrinsicExternal) {
+      return vmSymbols::NO_SID;   // regardless of name, no intrinsics here
+    }
+   }
+ 
+   // see if the klass name is well-known:
+diff --git a/hotspot/src/share/vm/opto/escape.cpp b/hotspot/src/share/vm/opto/escape.cpp
+index 9ef1c5e69..aa1b1ac3a 100644
+--- a/hotspot/src/share/vm/opto/escape.cpp
+++ b/hotspot/src/share/vm/opto/escape.cpp
+@@ -978,7 +978,8 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
+                   strcmp(call->as_CallLeaf()->_name, "squareToLen") == 0 ||
+                   strcmp(call->as_CallLeaf()->_name, "mulAdd") == 0 ||
+                   strcmp(call->as_CallLeaf()->_name, "montgomery_multiply") == 0 ||
+-                  strcmp(call->as_CallLeaf()->_name, "montgomery_square") == 0)
+                  strcmp(call->as_CallLeaf()->_name, "montgomery_square") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "f2jblas_ddot") == 0)
+                  ))) {
+             call->dump();
+             fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name));
+diff --git a/hotspot/src/share/vm/opto/library_call.cpp b/hotspot/src/share/vm/opto/library_call.cpp
+index 89ebabe6f..5cbc0f012 100644
+--- a/hotspot/src/share/vm/opto/library_call.cpp
+++ b/hotspot/src/share/vm/opto/library_call.cpp
+@@ -335,6 +335,7 @@ class LibraryCallKit : public GraphKit {
+   bool inline_mulAdd();
+   bool inline_montgomeryMultiply();
+   bool inline_montgomerySquare();
+  bool inline_ddotF2jBLAS();
+ 
+   bool inline_profileBoolean();
+ };
+@@ -587,6 +588,10 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
+     if (!UseCRC32Intrinsics) return NULL;
+     break;
+ 
+  case vmIntrinsics::_f2jblas_ddot:
+    if (!UseF2jBLASIntrinsics) return NULL;
+    break;
+
+   case vmIntrinsics::_incrementExactI:
+   case vmIntrinsics::_addExactI:
+     if (!Matcher::match_rule_supported(Op_OverflowAddI) || !UseMathExactIntrinsics) return NULL;
+@@ -983,6 +988,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
+ 
+   case vmIntrinsics::_profileBoolean:
+     return inline_profileBoolean();
+  case vmIntrinsics::_f2jblas_ddot:
+    return inline_ddotF2jBLAS();
+ 
+   default:
+     // If you get here, it may be that someone has added a new intrinsic
+@@ -6303,6 +6310,49 @@ bool LibraryCallKit::inline_updateBytesCRC32() {
+   return true;
+ }
+ 
+/**
+ * double com.github.fommil.netlib.F2jBLAS.ddot(int n, double[] dx, int incx, double[] dy, int incy)
+ */
+bool LibraryCallKit::inline_ddotF2jBLAS() {
+  assert(callee()->signature()->size() == 5, "update has 5 parameters");
+  Node* n    = argument(1);       // type: int
+  Node* dx   = argument(2);       // type: double[]
+  Node* incx = argument(3);       // type: int
+  Node* dy   = argument(4);       // type: double[]
+  Node* incy = argument(5);       // type: int
+
+  const Type* dx_type = dx->Value(&_gvn);
+  const Type* dy_type = dy->Value(&_gvn);
+  const TypeAryPtr* dx_top_src = dx_type->isa_aryptr();
+  const TypeAryPtr* dy_top_src = dy_type->isa_aryptr();
+  if (dx_top_src == NULL || dx_top_src->klass() == NULL ||
+      dy_top_src == NULL || dy_top_src->klass() == NULL) {
+    // failed array check
+    return false;
+  }
+
+  // Figure out the size and type of the elements we will be copying.
+  BasicType dx_elem = dx_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  BasicType dy_elem = dy_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  if (dx_elem != T_DOUBLE || dy_elem != T_DOUBLE) {
+    return false;
+  }
+
+  // 'dx_start' points to dx array + scaled offset
+  Node* dx_start = array_element_address(dx, intcon(0), dx_elem);
+  Node* dy_start = array_element_address(dy, intcon(0), dy_elem);
+
+  address stubAddr = StubRoutines::ddotF2jBLAS();
+  const char *stubName = "f2jblas_ddot";
+  Node* call;
+  call = make_runtime_call(RC_LEAF, OptoRuntime::ddotF2jBLAS_Type(),
+                           stubAddr, stubName, TypePtr::BOTTOM,
+                           n, dx_start, incx, dy_start, incy);
+  Node* result = _gvn.transform(new (C) ProjNode(call, TypeFunc::Parms));
+  set_result(result);
+  return true;
+}
+
+ /**
+  * Calculate CRC32 for ByteBuffer.
+  * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
+diff --git a/hotspot/src/share/vm/opto/runtime.cpp b/hotspot/src/share/vm/opto/runtime.cpp
+index ba8f42e49..f1fe4d666 100644
+--- a/hotspot/src/share/vm/opto/runtime.cpp
+++ b/hotspot/src/share/vm/opto/runtime.cpp
+@@ -920,6 +920,30 @@ const TypeFunc* OptoRuntime::updateBytesCRC32_Type() {
+   return TypeFunc::make(domain, range);
+ }
+ 
+/**
+ * double ddot(int n, double *dx, int incx, double *dy, int incy)
+ */
+const TypeFunc* OptoRuntime::ddotF2jBLAS_Type() {
+  // create input type (domain)
+  int num_args = 5;
+  int argcnt = num_args;
+  const Type** fields = TypeTuple::fields(argcnt);
+  int argp = TypeFunc::Parms;
+  fields[argp++] = TypeInt::INT;        // n
+  fields[argp++] = TypeAryPtr::DOUBLES;    // dx
+  fields[argp++] = TypeInt::INT;        // incx
+  fields[argp++] = TypeAryPtr::DOUBLES;    // dy
+  fields[argp++] = TypeInt::INT;        // incy
+  assert(argp == TypeFunc::Parms + argcnt, "correct decoding");
+  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + argcnt, fields);
+
+  // result type needed
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms + 0] = Type::DOUBLE;
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms + 1, fields);
+  return TypeFunc::make(domain, range);
+}
+
+ // for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning int
+ const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
+   // create input type (domain)
+diff --git a/hotspot/src/share/vm/opto/runtime.hpp b/hotspot/src/share/vm/opto/runtime.hpp
+index e3bdfdf9c..66d393c5c 100644
+--- a/hotspot/src/share/vm/opto/runtime.hpp
+++ b/hotspot/src/share/vm/opto/runtime.hpp
+@@ -317,6 +317,8 @@ private:
+ 
+   static const TypeFunc* updateBytesCRC32_Type();
+ 
+  static const TypeFunc* ddotF2jBLAS_Type();
+
+   // leaf on stack replacement interpreter accessor types
+   static const TypeFunc* osr_end_Type();
+ 
+diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp
+index 7b17e623b..520cc3187 100644
+--- a/hotspot/src/share/vm/runtime/globals.hpp
+++ b/hotspot/src/share/vm/runtime/globals.hpp
+@@ -743,6 +743,12 @@ class CommandLineFlags {
+   product(bool, UseCRC32Intrinsics, false,                                  \
+           "use intrinsics for java.util.zip.CRC32")                         \
+                                                                             \
+  experimental(bool, UseF2jBLASIntrinsics, false,                           \
+          "use intrinsics for com.github.fommil.netlib.F2jBLAS on aarch64") \
+                                                                            \
+  experimental(bool, EnableIntrinsicExternal, false,                        \
+          "enable intrinsics for methods of external packages")             \
+                                                                            \
+   develop(bool, TraceCallFixup, false,                                      \
+           "Trace all call fixups")                                          \
+                                                                             \
+diff --git a/hotspot/src/share/vm/runtime/stubRoutines.cpp b/hotspot/src/share/vm/runtime/stubRoutines.cpp
+index d943248da..10f438bc5 100644
+--- a/hotspot/src/share/vm/runtime/stubRoutines.cpp
+++ b/hotspot/src/share/vm/runtime/stubRoutines.cpp
+@@ -136,6 +136,8 @@ address StubRoutines::_sha512_implCompressMB = NULL;
+ address StubRoutines::_updateBytesCRC32 = NULL;
+ address StubRoutines::_crc_table_adr = NULL;
+ 
+address StubRoutines::_ddotF2jBLAS = NULL;
+
+ address StubRoutines::_multiplyToLen = NULL;
+ address StubRoutines::_squareToLen = NULL;
+ address StubRoutines::_mulAdd = NULL;
+diff --git a/hotspot/src/share/vm/runtime/stubRoutines.hpp b/hotspot/src/share/vm/runtime/stubRoutines.hpp
+index e18b9127d..a4eeb910d 100644
+--- a/hotspot/src/share/vm/runtime/stubRoutines.hpp
+++ b/hotspot/src/share/vm/runtime/stubRoutines.hpp
+@@ -214,6 +214,8 @@ class StubRoutines: AllStatic {
+   static address _updateBytesCRC32;
+   static address _crc_table_adr;
+ 
+  static address _ddotF2jBLAS;
+
+   static address _multiplyToLen;
+   static address _squareToLen;
+   static address _mulAdd;
+@@ -377,6 +379,8 @@ class StubRoutines: AllStatic {
+   static address updateBytesCRC32()    { return _updateBytesCRC32; }
+   static address crc_table_addr()      { return _crc_table_adr; }
+ 
+  static address ddotF2jBLAS()         { return _ddotF2jBLAS; }
+
+   static address multiplyToLen()       {return _multiplyToLen; }
+   static address squareToLen()         {return _squareToLen; }
+   static address mulAdd()              {return _mulAdd; }
--- a/fast-serializer-jdk8.patch
+++ b/fast-serializer-jdk8.patch
@ -1,679 +0,0 @@
-commit 3ece3b6a87e4bf61a1f786c12d796012becce313
-Author: hexuejin <hexuejin2@huawei.com>
-Date:   Thu May 28 10:30:20 2020 +0800
-
-    Add FastSerializer
-    
-    DTS/AR: AR.SR.IREQ02369011.001.001
-    Summary:<core-libs>:  Add FastSerializer
-    LLT: jtreg
-    Patch Type: huawei
-    Bug url: NA
-
-diff --git a/hotspot/src/share/vm/prims/unsafe.cpp b/hotspot/src/share/vm/prims/unsafe.cpp
-index cdb72c0d5..d50041635 100644
--- a/hotspot/src/share/vm/prims/unsafe.cpp
-+++ b/hotspot/src/share/vm/prims/unsafe.cpp
-@@ -1361,6 +1361,10 @@ UNSAFE_ENTRY(void, Unsafe_PrefetchWrite(JNIEnv* env, jclass ignored, jobject obj
-   Prefetch::write(addr, (intx)offset);
- UNSAFE_END
- 
-+UNSAFE_ENTRY(jboolean, Unsafe_GetUseFastSerializer(JNIEnv *env, jobject unsafe)) {
-+  return UseFastSerializer;
-+}
-+UNSAFE_END
- 
- /// JVM_RegisterUnsafeMethods
- 
-@@ -1447,7 +1451,8 @@ static JNINativeMethod methods_140[] = {
-     {CC "allocateInstance",   CC "(" CLS ")" OBJ,             FN_PTR(Unsafe_AllocateInstance)},
-     {CC "monitorEnter",       CC "(" OBJ ")V",               FN_PTR(Unsafe_MonitorEnter)},
-     {CC "monitorExit",        CC "(" OBJ ")V",               FN_PTR(Unsafe_MonitorExit)},
-    {CC "throwException",     CC "(" THR ")V",               FN_PTR(Unsafe_ThrowException)}
-+    {CC "throwException",     CC "(" THR ")V",               FN_PTR(Unsafe_ThrowException)},
-+    {CC "getUseFastSerializer",   CC "()Z",              FN_PTR(Unsafe_GetUseFastSerializer)}
- };
- 
- // These are the methods prior to the JSR 166 changes in 1.5.0
-@@ -1493,8 +1498,8 @@ static JNINativeMethod methods_141[] = {
-     {CC "allocateInstance",   CC "(" CLS ")" OBJ,             FN_PTR(Unsafe_AllocateInstance)},
-     {CC "monitorEnter",       CC "(" OBJ ")V",               FN_PTR(Unsafe_MonitorEnter)},
-     {CC "monitorExit",        CC "(" OBJ ")V",               FN_PTR(Unsafe_MonitorExit)},
-    {CC "throwException",     CC "(" THR ")V",               FN_PTR(Unsafe_ThrowException)}
-
-+    {CC "throwException",     CC "(" THR ")V",               FN_PTR(Unsafe_ThrowException)},
-+    {CC "getUseFastSerializer",   CC "()Z",              FN_PTR(Unsafe_GetUseFastSerializer)}
- };
- 
- // These are the methods prior to the JSR 166 changes in 1.6.0
-@@ -1548,7 +1553,8 @@ static JNINativeMethod methods_15[] = {
-     {CC "compareAndSwapInt",  CC "(" OBJ "J""I""I"")Z",      FN_PTR(Unsafe_CompareAndSwapInt)},
-     {CC "compareAndSwapLong", CC "(" OBJ "J""J""J"")Z",      FN_PTR(Unsafe_CompareAndSwapLong)},
-     {CC "park",               CC "(ZJ)V",                  FN_PTR(Unsafe_Park)},
-    {CC "unpark",             CC "(" OBJ ")V",               FN_PTR(Unsafe_Unpark)}
-+    {CC "unpark",             CC "(" OBJ ")V",               FN_PTR(Unsafe_Unpark)},
-+    {CC "getUseFastSerializer",   CC "()Z",              FN_PTR(Unsafe_GetUseFastSerializer)}
- 
- };
- 
-@@ -1606,7 +1612,8 @@ static JNINativeMethod methods_16[] = {
-     {CC "putOrderedInt",      CC "(" OBJ "JI)V",             FN_PTR(Unsafe_SetOrderedInt)},
-     {CC "putOrderedLong",     CC "(" OBJ "JJ)V",             FN_PTR(Unsafe_SetOrderedLong)},
-     {CC "park",               CC "(ZJ)V",                  FN_PTR(Unsafe_Park)},
-    {CC "unpark",             CC "(" OBJ ")V",               FN_PTR(Unsafe_Unpark)}
-+    {CC "unpark",             CC "(" OBJ ")V",               FN_PTR(Unsafe_Unpark)},
-+    {CC "getUseFastSerializer",   CC "()Z",              FN_PTR(Unsafe_GetUseFastSerializer)}
- };
- 
- // These are the methods for 1.8.0
-@@ -1662,7 +1669,8 @@ static JNINativeMethod methods_18[] = {
-     {CC "putOrderedInt",      CC "(" OBJ "JI)V",             FN_PTR(Unsafe_SetOrderedInt)},
-     {CC "putOrderedLong",     CC "(" OBJ "JJ)V",             FN_PTR(Unsafe_SetOrderedLong)},
-     {CC "park",               CC "(ZJ)V",                  FN_PTR(Unsafe_Park)},
-    {CC "unpark",             CC "(" OBJ ")V",               FN_PTR(Unsafe_Unpark)}
-+    {CC "unpark",             CC "(" OBJ ")V",               FN_PTR(Unsafe_Unpark)},
-+    {CC "getUseFastSerializer",   CC "()Z",              FN_PTR(Unsafe_GetUseFastSerializer)}
- };
- 
- JNINativeMethod loadavg_method[] = {
-diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp
-index 2e6ff26ed..0a6ebfae1 100644
--- a/hotspot/src/share/vm/runtime/globals.hpp
-+++ b/hotspot/src/share/vm/runtime/globals.hpp
-@@ -553,6 +553,10 @@ class CommandLineFlags {
-           "Enable normal processing of flags relating to experimental "     \
-           "features")                                                       \
-                                                                             \
-+  experimental(bool, UseFastSerializer, false,                              \
-+          "Cache-based serialization.It is extremely fast, but it can only" \
-+          "be effective in certain scenarios.")                             \
-+                                                                            \
-   product(bool, JavaMonitorsInStackTrace, true,                             \
-           "Print information about Java monitor locks when the stacks are"  \
-           "dumped")                                                         \
-diff --git a/jdk/src/share/classes/java/io/ObjectInputStream.java b/jdk/src/share/classes/java/io/ObjectInputStream.java
-index 5d30f2a01..b67f01719 100644
--- a/jdk/src/share/classes/java/io/ObjectInputStream.java
-+++ b/jdk/src/share/classes/java/io/ObjectInputStream.java
-@@ -49,6 +49,7 @@ import sun.misc.SharedSecrets;
- import sun.misc.JavaOISAccess;
- import sun.util.logging.PlatformLogger;
- import sun.security.action.GetBooleanAction;
-+import sun.misc.Unsafe;
- 
- /**
-  * An ObjectInputStream deserializes primitive data and objects previously
-@@ -284,6 +285,22 @@ public class ObjectInputStream
-             traceLogger = (filterLog != null &&
-                 filterLog.isLoggable(PlatformLogger.Level.FINER)) ? filterLog : null;
-         }
-+
-+        /*
-+         * Logger for FastSerializer.
-+         * Setup the FastSerializer logger if it is set to FINE
-+         * (Assuming it will not change).
-+         */
-+        private static final PlatformLogger fastSerLogger;
-+        static {
-+            if (printFastSerializer) {
-+                PlatformLogger fastSerLog = PlatformLogger.getLogger("fastSerializer");
-+                fastSerLogger = (fastSerLog != null &&
-+                    fastSerLog.isLoggable(PlatformLogger.Level.FINE)) ? fastSerLog : null;
-+            } else {
-+                fastSerLogger = null;
-+            }
-+        }
-     }
- 
-     /** filter stream for handling block data conversion */
-@@ -312,6 +329,9 @@ public class ObjectInputStream
-     /** if true, invoke resolveObject() */
-     private boolean enableResolve;
- 
-+    /** Used to get the commandline option: useFastSerializer */
-+    private static final Unsafe UNSAFE = Unsafe.getUnsafe();
-+
-     /**
-      * Context during upcalls to class-defined readObject methods; holds
-      * object currently being deserialized and descriptor for current class.
-@@ -325,6 +345,33 @@ public class ObjectInputStream
-      */
-     private ObjectInputFilter serialFilter;
- 
-+    /**
-+     * value of "useFastSerializer" property
-+     */
-+    private static final boolean defaultFastSerializer = UNSAFE.getUseFastSerializer();
-+
-+    /**
-+     *  true or false for open FastSerilizer
-+     *  May be changed in readStreamHeader
-+     */
-+    private boolean useFastSerializer = defaultFastSerializer;
-+
-+    /**
-+     * Value of "fastSerializerEscapeMode" property. It can be turned on
-+     * when useFastSerializer is true.
-+     */
-+    private static final boolean fastSerializerEscapeMode = java.security.AccessController.doPrivileged(
-+            new sun.security.action.GetBooleanAction(
-+                    "fastSerializerEscapeMode")).booleanValue();
-+
-+    /**
-+     * value of  "printFastSerializer" property,
-+     * as true or false for printing FastSerializer logs.
-+     */
-+    private static final boolean printFastSerializer = java.security.AccessController.doPrivileged(
-+            new sun.security.action.GetBooleanAction(
-+                    "printFastSerializer")).booleanValue();
-+
-     /**
-      * Creates an ObjectInputStream that reads from the specified InputStream.
-      * A serialization stream header is read from the stream and verified.
-@@ -396,6 +443,9 @@ public class ObjectInputStream
-      * transitively so that a complete equivalent graph of objects is
-      * reconstructed by readObject.
-      *
-+     * The difference between fastSerialzation and default serialization is the
-+     * descriptor serialization. The data serialization is same with each other.
-+     *
-      * <p>The root object is completely restored when all of its fields and the
-      * objects it references are completely restored.  At this point the object
-      * validation callbacks are executed in order based on their registered
-@@ -670,11 +720,20 @@ public class ObjectInputStream
-         vlist.register(obj, prio);
-     }
- 
-+    /**
-+     * Cache the class meta during serialization.
-+     * Only used in FastSerilizer.
-+     */
-+    protected static ConcurrentHashMap<String,Class<?>> nameToClass = new ConcurrentHashMap<>();
-+
-     /**
-      * Load the local class equivalent of the specified stream class
-      * description.  Subclasses may implement this method to allow classes to
-      * be fetched from an alternate source.
-      *
-+     * When fastSerializer is turned on, fields of desc will be null except
-+     * name. When resolveClass is override, this may cause null pointer exception.
-+     *
-      * <p>The corresponding method in <code>ObjectOutputStream</code> is
-      * <code>annotateClass</code>.  This method will be invoked only once for
-      * each unique class in the stream.  This method can be implemented by
-@@ -715,16 +774,27 @@ public class ObjectInputStream
-         throws IOException, ClassNotFoundException
-     {
-         String name = desc.getName();
-+        Class<?> cl = null;
-+
-+        if (useFastSerializer) {
-+            cl = nameToClass.get(name);
-+             if (cl != null) {
-+                 return cl;
-+             }
-+        }
-         try {
-            return Class.forName(name, false, latestUserDefinedLoader());
-+            cl = Class.forName(name, false, latestUserDefinedLoader());
-         } catch (ClassNotFoundException ex) {
-            Class<?> cl = primClasses.get(name);
-            if (cl != null) {
-                return cl;
-            } else {
-+            cl = primClasses.get(name);
-+            if (cl == null) {
-                 throw ex;
-             }
-         }
-+        if (useFastSerializer) {
-+            nameToClass.put(name, cl);
-+        }
-+
-+        return cl;
-     }
- 
-     /**
-@@ -894,9 +964,34 @@ public class ObjectInputStream
-     {
-         short s0 = bin.readShort();
-         short s1 = bin.readShort();
-        if (s0 != STREAM_MAGIC || s1 != STREAM_VERSION) {
-            throw new StreamCorruptedException(
-                String.format("invalid stream header: %04X%04X", s0, s1));
-+        if (useFastSerializer) {
-+            if (s0 != STREAM_MAGIC_FAST || s1 != STREAM_VERSION) {
-+
-+                if (s0 != STREAM_MAGIC) {
-+                    throw new StreamCorruptedException(
-+                            String.format("invalid stream header: %04X%04X, and FastSerializer is activated", s0, s1));
-+                }
-+
-+                if (!fastSerializerEscapeMode) {
-+                    throw new StreamCorruptedException(
-+                            String.format("invalid stream header: %04X%04X.Fast serialization does not support " +
-+                                          "original serialized files", s0, s1));
-+                }
-+
-+                // Escape to default serialization
-+                useFastSerializer = false;
-+                if (Logging.fastSerLogger != null) {
-+                    Logging.fastSerLogger.fine("[Deserialize]: Escape and disable FastSerializer");
-+                }
-+            }
-+        } else if (s0 != STREAM_MAGIC || s1 != STREAM_VERSION) {
-+            if (s0 == STREAM_MAGIC_FAST && s1 == STREAM_VERSION) {
-+                throw new StreamCorruptedException(
-+                        String.format("invalid stream header: %04X%04X, and it is a FastSerializer stream", s0, s1));
-+            } else {
-+                throw new StreamCorruptedException(
-+                        String.format("invalid stream header: %04X%04X", s0, s1));
-+            }
-         }
-     }
- 
-@@ -910,6 +1005,11 @@ public class ObjectInputStream
-      * this method reads class descriptors according to the format defined in
-      * the Object Serialization specification.
-      *
-+     * In fastSerialize mode, the descriptor is obtained by lookup method. And
-+     * the resolveClass method is called here to get the classmeta. Since the
-+     * descriptor is obtained by lookup, the descriptor is same as localdesc.
-+     * So we cann't distinguish the receiver desc and local desc.
-+     *
-      * @return  the class descriptor read
-      * @throws  IOException If an I/O error has occurred.
-      * @throws  ClassNotFoundException If the Class of a serialized object used
-@@ -920,6 +1020,27 @@ public class ObjectInputStream
-     protected ObjectStreamClass readClassDescriptor()
-         throws IOException, ClassNotFoundException
-     {
-+        // fastSerializer
-+        if (useFastSerializer) {
-+            String name = readUTF();
-+            Class<?> cl = null;
-+            ObjectStreamClass desc = new ObjectStreamClass(name);
-+            try {
-+                // In order to match this method, we add an annotateClass method in
-+                // writeClassDescriptor.
-+                cl = resolveClass(desc);
-+            } catch (ClassNotFoundException ex) {
-+                // resolveClass is just used to obtain Class which required by lookup method
-+                // and it will be called again later, so we don't throw ClassNotFoundException here.
-+                return desc;
-+            }
-+            if (cl != null) {
-+                desc = ObjectStreamClass.lookup(cl, true);
-+            }
-+            return desc;
-+        }
-+
-+        // Default deserialization. If the Class cannot be found, throw ClassNotFoundException.
-         ObjectStreamClass desc = new ObjectStreamClass();
-         desc.readNonProxy(this);
-         return desc;
-@@ -1935,17 +2056,40 @@ public class ObjectInputStream
- 
-         skipCustomData();
- 
-        try {
-            totalObjectRefs++;
-            depth++;
-            desc.initNonProxy(readDesc, cl, resolveEx, readClassDesc(false));
-        } finally {
-            depth--;
-+        totalObjectRefs++;
-+        depth++;
-+
-+        if (useFastSerializer) {
-+            desc.initNonProxyFast(readDesc, resolveEx);
-+            ObjectStreamClass superDesc = desc.getSuperDesc();
-+            long originDepth = depth - 1;
-+            // Since desc is obtained from the lookup method, we will lose the depth and
-+            // totalObjectRefs of superDesc. So we add a loop here to compute the depth
-+            // and objectRef of superDesc.
-+            while (superDesc != null && superDesc.forClass() != null) {
-+                filterCheck(superDesc.forClass(), -1);
-+                superDesc = superDesc.getSuperDesc();
-+                totalObjectRefs++;
-+                depth++;
-+            }
-+            depth = originDepth;
-+        } else {
-+            try {
-+                desc.initNonProxy(readDesc, cl, resolveEx, readClassDesc(false));
-+            } finally {
-+                depth--;
-+            }
-         }
- 
-         handles.finish(descHandle);
-         passHandle = descHandle;
- 
-+        if (Logging.fastSerLogger != null) {
-+            Logging.fastSerLogger.fine(
-+                    "[Deserialize] useFastSerializer:{0}, Class name:{1}, SerialVersionUID:{2}, flags:{3}",
-+                    useFastSerializer, desc.getName(), desc.getSerialVersionUID(), desc.getFlags(this));
-+        }
-+
-         return desc;
-     }
- 
-@@ -2334,21 +2478,25 @@ public class ObjectInputStream
-             desc.setPrimFieldValues(obj, primVals);
-         }
- 
-        int objHandle = passHandle;
-        ObjectStreamField[] fields = desc.getFields(false);
-        Object[] objVals = new Object[desc.getNumObjFields()];
-        int numPrimFields = fields.length - objVals.length;
-        for (int i = 0; i < objVals.length; i++) {
-            ObjectStreamField f = fields[numPrimFields + i];
-            objVals[i] = readObject0(Object.class, f.isUnshared());
-            if (f.getField() != null) {
-                handles.markDependency(objHandle, passHandle);
-+        Object[] objVals = null;
-+        int numObjFields = desc.getNumObjFields();
-+        if (numObjFields > 0) {
-+            int objHandle = passHandle;
-+            ObjectStreamField[] fields = desc.getFields(false);
-+            objVals = new Object[numObjFields];
-+            int numPrimFields = fields.length - objVals.length;
-+            for (int i = 0; i < objVals.length; i++) {
-+                ObjectStreamField f = fields[numPrimFields + i];
-+                objVals[i] = readObject0(Object.class, f.isUnshared());
-+                if (f.getField() != null) {
-+                    handles.markDependency(objHandle, passHandle);
-+                }
-             }
-+            if (obj != null) {
-+                desc.setObjFieldValues(obj, objVals);
-+            }
-+            passHandle = objHandle;
-         }
-        if (obj != null) {
-            desc.setObjFieldValues(obj, objVals);
-        }
-        passHandle = objHandle;
-     }
- 
-     /**
-diff --git a/jdk/src/share/classes/java/io/ObjectOutputStream.java b/jdk/src/share/classes/java/io/ObjectOutputStream.java
-index 6d29e3a1f..3890efc3e 100644
--- a/jdk/src/share/classes/java/io/ObjectOutputStream.java
-+++ b/jdk/src/share/classes/java/io/ObjectOutputStream.java
-@@ -37,6 +37,8 @@ import java.util.concurrent.ConcurrentMap;
- import static java.io.ObjectStreamClass.processQueue;
- import java.io.SerialCallbackContext;
- import sun.reflect.misc.ReflectUtil;
-+import sun.misc.Unsafe;
-+import sun.util.logging.PlatformLogger;
- 
- /**
-  * An ObjectOutputStream writes primitive data types and graphs of Java objects
-@@ -173,6 +175,24 @@ public class ObjectOutputStream
-             new ReferenceQueue<>();
-     }
- 
-+    private static class Logging {
-+        /*
-+         * Logger for FastSerializer.
-+         * Setup the FastSerializer logger if it is set to FINE.
-+         * (Assuming it will not change).
-+         */
-+        static final PlatformLogger fastSerLogger;
-+        static {
-+            if (printFastSerializer) {
-+                PlatformLogger fastSerLog = PlatformLogger.getLogger("fastSerializer");
-+                fastSerLogger = (fastSerLog != null &&
-+                        fastSerLog.isLoggable(PlatformLogger.Level.FINE)) ? fastSerLog : null;
-+            } else {
-+                fastSerLogger = null;
-+            }
-+        }
-+    }
-+
-     /** filter stream for handling block data conversion */
-     private final BlockDataOutputStream bout;
-     /** obj -> wire handle map */
-@@ -214,6 +234,22 @@ public class ObjectOutputStream
-             new sun.security.action.GetBooleanAction(
-                 "sun.io.serialization.extendedDebugInfo")).booleanValue();
- 
-+    private static final Unsafe UNSAFE = Unsafe.getUnsafe();
-+
-+    /**
-+     * Value of "UseFastSerializer" property. The fastSerializer is turned
-+     * on when it is true.
-+     */
-+    private static final boolean useFastSerializer = UNSAFE.getUseFastSerializer();
-+
-+    /**
-+     * value of  "printFastSerializer" property,
-+     * as true or false for printing FastSerializer logs.
-+     */
-+    private static final boolean printFastSerializer = java.security.AccessController.doPrivileged(
-+            new sun.security.action.GetBooleanAction(
-+                    "printFastSerializer")).booleanValue();
-+
-     /**
-      * Creates an ObjectOutputStream that writes to the specified OutputStream.
-      * This constructor writes the serialization stream header to the
-@@ -327,6 +363,9 @@ public class ObjectOutputStream
-      * object are written transitively so that a complete equivalent graph of
-      * objects can be reconstructed by an ObjectInputStream.
-      *
-+     * The difference between fastSerialzation and default serialization is the
-+     * descriptor serialization. The data serialization is same with each other.
-+     *
-      * <p>Exceptions are thrown for problems with the OutputStream and for
-      * classes that should not be serialized.  All exceptions are fatal to the
-      * OutputStream, which is left in an indeterminate state, and it is up to
-@@ -633,7 +672,11 @@ public class ObjectOutputStream
-      *          stream
-      */
-     protected void writeStreamHeader() throws IOException {
-        bout.writeShort(STREAM_MAGIC);
-+        if (useFastSerializer) {
-+            bout.writeShort(STREAM_MAGIC_FAST);
-+        } else {
-+            bout.writeShort(STREAM_MAGIC);
-+        }
-         bout.writeShort(STREAM_VERSION);
-     }
- 
-@@ -648,6 +691,9 @@ public class ObjectOutputStream
-      * By default, this method writes class descriptors according to the format
-      * defined in the Object Serialization specification.
-      *
-+     * In fastSerializer mode, we will only write the classname to the stream.
-+     * The annotateClass is used to match the resolveClass in readClassDescriptor.
-+     *
-      * <p>Note that this method will only be called if the ObjectOutputStream
-      * is not using the old serialization stream format (set by calling
-      * ObjectOutputStream's <code>useProtocolVersion</code> method).  If this
-@@ -665,7 +711,14 @@ public class ObjectOutputStream
-     protected void writeClassDescriptor(ObjectStreamClass desc)
-         throws IOException
-     {
-        desc.writeNonProxy(this);
-+        if (useFastSerializer) {
-+            writeUTF(desc.getName());
-+            // The annotateClass is used to match the resolveClass called in
-+            // readClassDescriptor.
-+            annotateClass(desc.forClass());
-+        } else {
-+            desc.writeNonProxy(this);
-+        }
-     }
- 
-     /**
-@@ -1275,9 +1328,21 @@ public class ObjectOutputStream
-         bout.writeByte(TC_CLASSDESC);
-         handles.assign(unshared ? null : desc);
- 
-+        if (Logging.fastSerLogger != null) {
-+            Logging.fastSerLogger.fine(
-+            "[Serialize]   useFastSerializer:{0}, Class name:{1}, SerialVersionUID:{2}, flags:{3}, protocol:{4}",
-+            useFastSerializer, desc.getName(), desc.getSerialVersionUID(), desc.getFlags(this), protocol);
-+        }
-+
-         if (protocol == PROTOCOL_VERSION_1) {
-             // do not invoke class descriptor write hook with old protocol
-            desc.writeNonProxy(this);
-+            if (useFastSerializer) {
-+                // only write name and annotate class when using FastSerializer
-+                writeUTF(desc.getName());
-+                annotateClass(desc.forClass());
-+            } else {
-+                desc.writeNonProxy(this);
-+            }
-         } else {
-             writeClassDescriptor(desc);
-         }
-@@ -1291,7 +1356,9 @@ public class ObjectOutputStream
-         bout.setBlockDataMode(false);
-         bout.writeByte(TC_ENDBLOCKDATA);
- 
-        writeClassDesc(desc.getSuperDesc(), false);
-+        if (!useFastSerializer) {
-+            writeClassDesc(desc.getSuperDesc(), false);
-+        }
-     }
- 
-     /**
-diff --git a/jdk/src/share/classes/java/io/ObjectStreamClass.java b/jdk/src/share/classes/java/io/ObjectStreamClass.java
-index 64453b25a..fce3c3475 100644
--- a/jdk/src/share/classes/java/io/ObjectStreamClass.java
-+++ b/jdk/src/share/classes/java/io/ObjectStreamClass.java
-@@ -280,6 +280,40 @@ public class ObjectStreamClass implements Serializable {
-         return suid.longValue();
-     }
- 
-+    /**
-+     * Return the flags for this class described by this descriptor. The flags
-+     * means a set of bit masks for ObjectStreamClass, which indicate the status
-+     * of SC_WRITE_METHOD, SC_SERIALIZABLE, SC_EXTERNALIZABLE, SC_BLOCK_DATA and
-+     * SC_ENUM.
-+     *
-+     * @param serialStream ObjectOutputStream or ObjectInputStream
-+     *
-+     * @return the flags for this class described by this descriptor
-+     */
-+    public byte getFlags(Object serialStream) {
-+        byte flags = 0;
-+        if (externalizable) {
-+            flags |= ObjectStreamConstants.SC_EXTERNALIZABLE;
-+            if (serialStream instanceof ObjectOutputStream) {
-+                int protocol = ((ObjectOutputStream)serialStream).getProtocolVersion();
-+                if (protocol != ObjectStreamConstants.PROTOCOL_VERSION_1) {
-+                    flags |= ObjectStreamConstants.SC_BLOCK_DATA;
-+                }
-+            } else if (serialStream instanceof ObjectInputStream) {
-+                flags |= ObjectStreamConstants.SC_BLOCK_DATA;
-+            }
-+        } else if (serializable) {
-+            flags |= ObjectStreamConstants.SC_SERIALIZABLE;
-+        }
-+        if (hasWriteObjectData) {
-+            flags |= ObjectStreamConstants.SC_WRITE_METHOD;
-+        }
-+        if (isEnum) {
-+            flags |= ObjectStreamConstants.SC_ENUM;
-+        }
-+        return flags;
-+    }
-+
-     /**
-      * Return the class in the local VM that this version is mapped to.  Null
-      * is returned if there is no corresponding local class.
-@@ -570,6 +604,15 @@ public class ObjectStreamClass implements Serializable {
-     ObjectStreamClass() {
-     }
- 
-+    /**
-+     * Create a blank class descriptor with name. It is only used
-+     * in fastSerialize path.
-+     * @param name  class name
-+     */
-+    ObjectStreamClass(String name) {
-+        this.name  = name;
-+    }
-+
-     /**
-      * Creates a PermissionDomain that grants no permission.
-      */
-@@ -756,6 +799,44 @@ public class ObjectStreamClass implements Serializable {
-         initialized = true;
-     }
- 
-+    /**
-+     * Initializes class descriptor representing a non-proxy class.
-+     * Used in fast serialization mode.
-+     */
-+    void initNonProxyFast(ObjectStreamClass model,
-+                          ClassNotFoundException resolveEx)
-+    {
-+        this.cl = model.cl;
-+        this.resolveEx = resolveEx;
-+        this.superDesc = model.superDesc;
-+        name = model.name;
-+        this.suid = model.suid;
-+        isProxy = false;
-+        isEnum = model.isEnum;
-+        serializable = model.serializable;
-+        externalizable = model.externalizable;
-+        hasBlockExternalData = model.hasBlockExternalData;
-+        hasWriteObjectData = model.hasWriteObjectData;
-+        fields = model.fields;
-+        primDataSize = model.primDataSize;
-+        numObjFields = model.numObjFields;
-+
-+        writeObjectMethod = model.writeObjectMethod;
-+        readObjectMethod = model.readObjectMethod;
-+        readObjectNoDataMethod = model.readObjectNoDataMethod;
-+        writeReplaceMethod = model.writeReplaceMethod;
-+        readResolveMethod = model.readResolveMethod;
-+        if (deserializeEx == null) {
-+            deserializeEx = model.deserializeEx;
-+        }
-+        domains = model.domains;
-+        cons = model.cons;
-+        fieldRefl = model.fieldRefl;
-+        localDesc = model;
-+
-+        initialized = true;
-+    }
-+
-     /**
-      * Reads non-proxy class descriptor information from given input stream.
-      * The resulting class descriptor is not fully functional; it can only be
-diff --git a/jdk/src/share/classes/java/io/ObjectStreamConstants.java b/jdk/src/share/classes/java/io/ObjectStreamConstants.java
-index 23f72b436..59179a6ec 100644
--- a/jdk/src/share/classes/java/io/ObjectStreamConstants.java
-+++ b/jdk/src/share/classes/java/io/ObjectStreamConstants.java
-@@ -38,6 +38,11 @@ public interface ObjectStreamConstants {
-      */
-     final static short STREAM_MAGIC = (short)0xaced;
- 
-+    /**
-+     * Magic number that is written to the stream header when using fastserilizer.
-+     */
-+    static final short STREAM_MAGIC_FAST = (short)0xdeca;
-+
-     /**
-      * Version number that is written to the stream header.
-      */
-diff --git a/jdk/src/share/classes/sun/misc/Unsafe.java b/jdk/src/share/classes/sun/misc/Unsafe.java
-index 99e465802..92fb01669 100644
--- a/jdk/src/share/classes/sun/misc/Unsafe.java
-+++ b/jdk/src/share/classes/sun/misc/Unsafe.java
-@@ -433,6 +433,8 @@ public final class Unsafe {
-     /** @see #putByte(long, byte) */
-     public native void    putDouble(long address, double x);
- 
-+    public native boolean getUseFastSerializer();
-+
-     /**
-      * Fetches a native pointer from a given memory address.  If the address is
-      * zero, or does not point into a block obtained from {@link
--- a/fix-LongCache-s-range-when-BoxTypeCachedMax-number-is-bigger-than-Integer.MAX_VALUE.patch
+++ b/fix-LongCache-s-range-when-BoxTypeCachedMax-number-is-bigger-than-Integer.MAX_VALUE.patch
@ -0,0 +1,46 @@
+diff --git a/jdk/src/share/classes/java/lang/Long.java b/jdk/src/share/classes/java/lang/Long.java
+index 58c2cc3ba..7b6e14a97 100644
+--- a/jdk/src/share/classes/java/lang/Long.java
+++ b/jdk/src/share/classes/java/lang/Long.java
+@@ -812,12 +812,11 @@ public final class Long extends Number implements Comparable<Long> {
+         static final Long cache[];
+ 
+         static {
+-
+            int h = 127;
+             String longCacheHighPropValue =
+                 sun.misc.VM.getSavedProperty("java.lang.Long.LongCache.high");
+             if (longCacheHighPropValue != null) {
+                 // high value may be configured by property
+-                int h = 0;
+                 try {
+                     int i = Integer.parseInt(longCacheHighPropValue);
+                     i = Math.max(i, 127);
+@@ -826,21 +825,13 @@ public final class Long extends Number implements Comparable<Long> {
+                 } catch( NumberFormatException nfe) {
+                     // If the property cannot be parsed into an int, ignore it.
+                 }
+-                high = h;
+-                low = -h - 1;
+-                cache = new Long[(high - low) + 1];
+-                int j = low;
+-                for(int k = 0; k < cache.length; k++)
+-                    cache[k] = new Long(j++);
+-
+-            } else {
+-                low = -128;
+-                high = 127;
+-                cache = new Long[(high - low) + 1];
+-                int j = low;
+-                for(int k = 0; k < cache.length; k++)
+-                    cache[k] = new Long(j++);
+             }
+            high = h;
+            low = -h - 1;
+            cache = new Long[(high - low) + 1];
+            int j = low;
+            for(int k = 0; k < cache.length; k++)
+               cache[k] = new Long(j++);
+         }
+     }
+ 
--- a/java-1.8.0-openjdk.spec
+++ b/java-1.8.0-openjdk.spec
@ -915,7 +915,7 @@ Provides: java-%{javaver}-%{origin}-accessibility%{?1} = %{epoch}:%{version}-%{r

 Name:    java-%{javaver}-%{origin}
 Version: %{javaver}.%{updatever}.%{buildver}
-Release: 0
+Release: 2
 # java-1.5.0-ibm from jpackage.org set Epoch to 1 for unknown reasons
 # and this change was brought into RHEL-4. java-1.5.0-ibm packages
 # also included the epoch in their virtual provides. This created a
@ -1031,12 +1031,18 @@ Patch89: 8144993-Elide-redundant-memory-barrier-after-AllocationNode.patch
 Patch90: 8223504-improve-performance-of-forall-loops-by-better.patch
 Patch91: add-vm-option-BoxTypeCachedMax-for-Integer-and-Long-cache.patch
 Patch92: 8080289-8040213-8189067-move-the-store-out-of-the-loop.patch
-Patch93: fast-serializer-jdk8.patch
 Patch94: 8182397-race-in-field-updates.patch
 Patch95: 8205921-Optimizing-best-of-2-work-stealing-queue-selection.patch

 # 8u265
 Patch96: fix-Long-cache-range-and-remove-VM-option-java.lang.IntegerCache.high-by-default.patch
+Patch97: leaf-optimize-in-ParallelScanvageGC.patch
+Patch98: 8046294-Generate-the-4-byte-timestamp-randomly.patch
+Patch100: 8203481-Incorrect-constraint-for-unextended_sp-in-frame-safe_for_sender.patch
+Patch102: fix-LongCache-s-range-when-BoxTypeCachedMax-number-is-bigger-than-Integer.MAX_VALUE.patch
+Patch103: Ddot-intrinsic-implement.patch
+Patch104: 8234003-Improve-IndexSet-iteration.patch
+Patch105: 8220159-Optimize-various-RegMask-operations-by-introducing-watermarks.patch

 #############################################
 #
@ -1428,10 +1434,17 @@ pushd %{top_level_dir_name}
 %patch90 -p1
 %patch91 -p1
 %patch92 -p1
-%patch93 -p1
 %patch94 -p1
 %patch95 -p1
 %patch96 -p1
+%patch97 -p1
+%patch98 -p1
+%patch100 -p1
+%patch102 -p1
+%patch103 -p1
+%patch104 -p1
+%patch105 -p1
+

 popd

@ -2051,6 +2064,19 @@ require "copy_jdk_configs.lua"
 %endif

 %changelog
+* Mon Sep 1 2020 jdkboy <guoge1@huawei.com> - 1:1.8.0.265-b10.2
+- Remove fast-serializer-jdk8.patch
+
+* Tue Aug 29 2020 jdkboy <guoge1@huawei.com> - 1:1.8.0.265-b10.1
+- Add leaf-optimize-in-ParallelScanvageGC.patch
+- Add 8046294-Generate-the-4-byte-timestamp-randomly.patch
+- Add 8203481-Incorrect-constraint-for-unextended_sp-in-frame-safe_for_sender.patch
+- Add fix-LongCache-s-range-when-BoxTypeCachedMax-number-is-bigger-than-Integer.MAX_VALUE.patch
+- Add Ddot-intrinsic-implement.patch
+- Add 8234003-Improve-IndexSet-iteration.patch
+- Add 8220159-Optimize-various-RegMask-operations-by-introducing-watermarks.patch
+- Remove prohibition-of-irreducible-loop-in-mergers.patch 
+
 * Tue Aug 25 2020 noah <hedongbo@huawei.com> - 1:1.8.0.265-b10.0
 - Update to aarch64-shenandoah-jdk8u-8u265-b01
 - add fix-Long-cache-range-and-remove-VM-option-java.lang.IntegerCache.high-by-default.patch
--- a/leaf-optimize-in-ParallelScanvageGC.patch
+++ b/leaf-optimize-in-ParallelScanvageGC.patch
@ -0,0 +1,210 @@
+diff --git a/hotspot/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp b/hotspot/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp
+index fba64e15f..1c92314f9 100644
+--- a/hotspot/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp
+++ b/hotspot/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp
+@@ -131,6 +131,14 @@ inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t*
+  return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
+ }
+ 
+inline intptr_t Atomic::relax_cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value)
+{
+  intptr_t value = compare_value;
+  __atomic_compare_exchange(dest, &value, &exchange_value, /* weak */false,
+                              __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+  return value;
+}
+
+ inline void* Atomic::cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value)
+ {
+   return (void *) cmpxchg_ptr((intptr_t) exchange_value,
+diff --git a/hotspot/src/share/vm/classfile/classFileParser.cpp b/hotspot/src/share/vm/classfile/classFileParser.cpp
+index 07d07e4f2..f001a94e7 100644
+--- a/hotspot/src/share/vm/classfile/classFileParser.cpp
+++ b/hotspot/src/share/vm/classfile/classFileParser.cpp
+@@ -4393,6 +4393,11 @@ void ClassFileParser::fill_oop_maps(instanceKlassHandle k,
+   OopMapBlock* this_oop_map = k->start_of_nonstatic_oop_maps();
+   const InstanceKlass* const super = k->superklass();
+   const unsigned int super_count = super ? super->nonstatic_oop_map_count() : 0;
+
+  const bool super_is_gc_leaf = super ? super->oop_is_gc_leaf() : true;
+  bool this_is_gc_leaf = super_is_gc_leaf && (nonstatic_oop_map_count == 0);
+  k->set_oop_is_gc_leaf(this_is_gc_leaf);
+
+   if (super_count > 0) {
+     // Copy maps from superklass
+     OopMapBlock* super_oop_map = super->start_of_nonstatic_oop_maps();
+diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
+index b2de74d41..dde9ac426 100644
+--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
+@@ -49,7 +49,12 @@ inline void PSPromotionManager::claim_or_forward_internal_depth(T* p) {
+       }
+       oopDesc::encode_store_heap_oop_not_null(p, o);
+     } else {
+-      push_depth(p);
+      // leaf object copy in advanced, reduce cost of push and pop
+      if (!o->klass()->oop_is_gc_leaf()) {
+        push_depth(p);
+      } else {
+        PSScavenge::copy_and_push_safe_barrier<T, false>(this, p);
+      }
+     }
+   }
+ }
+@@ -171,7 +176,15 @@ oop PSPromotionManager::copy_to_survivor_space(oop o) {
+     Copy::aligned_disjoint_words((HeapWord*)o, (HeapWord*)new_obj, new_obj_size);
+ 
+     // Now we have to CAS in the header.
+#ifdef AARCH64
+    // CAS with memory fence cost a lot within copy_to_survivor_space on aarch64.
+    // To minimize the cost, we use a normal CAS to do object forwarding, plus a
+    // memory fence only upon CAS succeeds. To further reduce the fence insertion,
+    // we can skip the fence insertion for leaf objects (objects don't have reference fields).
+    if (o->relax_cas_forward_to(new_obj, test_mark)) {
+#else
+     if (o->cas_forward_to(new_obj, test_mark)) {
+#endif
+       // We won any races, we "own" this object.
+       assert(new_obj == o->forwardee(), "Sanity");
+ 
+@@ -195,10 +208,13 @@ oop PSPromotionManager::copy_to_survivor_space(oop o) {
+         push_depth(masked_o);
+         TASKQUEUE_STATS_ONLY(++_arrays_chunked; ++_masked_pushes);
+       } else {
+-        // we'll just push its contents
+-        new_obj->push_contents(this);
+        // leaf object don't have contents, never need push_contents
+        if (!o->klass()->oop_is_gc_leaf()) {
+          // we'll just push its contents
+          new_obj->push_contents(this);
+        }
+       }
+-    }  else {
+    } else {
+       // We lost, someone else "owns" this object
+       guarantee(o->is_forwarded(), "Object must be forwarded if the cas failed.");
+ 
+diff --git a/hotspot/src/share/vm/oops/klass.cpp b/hotspot/src/share/vm/oops/klass.cpp
+index 7fda7ce62..6e8f9acde 100644
+--- a/hotspot/src/share/vm/oops/klass.cpp
+++ b/hotspot/src/share/vm/oops/klass.cpp
+@@ -207,6 +207,8 @@ Klass::Klass() {
+   clear_modified_oops();
+   clear_accumulated_modified_oops();
+   _shared_class_path_index = -1;
+
+  set_oop_is_gc_leaf(false);
+ }
+ 
+ jint Klass::array_layout_helper(BasicType etype) {
+diff --git a/hotspot/src/share/vm/oops/klass.hpp b/hotspot/src/share/vm/oops/klass.hpp
+index 22ae48f5c..4aea54795 100644
+--- a/hotspot/src/share/vm/oops/klass.hpp
+++ b/hotspot/src/share/vm/oops/klass.hpp
+@@ -177,6 +177,8 @@ class Klass : public Metadata {
+   jbyte _modified_oops;             // Card Table Equivalent (YC/CMS support)
+   jbyte _accumulated_modified_oops; // Mod Union Equivalent (CMS support)
+ 
+  bool _is_gc_leaf;
+
+ private:
+   // This is an index into FileMapHeader::_classpath_entry_table[], to
+   // associate this class with the JAR file where it's loaded from during
+@@ -569,6 +571,9 @@ protected:
+                                                     oop_is_typeArray_slow()); }
+   #undef assert_same_query
+ 
+  void set_oop_is_gc_leaf(bool is_gc_leaf)        { _is_gc_leaf = is_gc_leaf; }
+  inline bool oop_is_gc_leaf()              const { return _is_gc_leaf; }
+
+   // Access flags
+   AccessFlags access_flags() const         { return _access_flags;  }
+   void set_access_flags(AccessFlags flags) { _access_flags = flags; }
+diff --git a/hotspot/src/share/vm/oops/oop.hpp b/hotspot/src/share/vm/oops/oop.hpp
+index a703a54ef..41a7bce4d 100644
+--- a/hotspot/src/share/vm/oops/oop.hpp
+++ b/hotspot/src/share/vm/oops/oop.hpp
+@@ -76,6 +76,9 @@ class oopDesc {
+ 
+   void    release_set_mark(markOop m);
+   markOop cas_set_mark(markOop new_mark, markOop old_mark);
+#ifdef AARCH64
+  markOop relax_cas_set_mark(markOop new_mark, markOop old_mark);
+#endif
+ 
+   // Used only to re-initialize the mark word (e.g., of promoted
+   // objects during a GC) -- requires a valid klass pointer
+@@ -317,6 +320,10 @@ class oopDesc {
+   void forward_to(oop p);
+   bool cas_forward_to(oop p, markOop compare);
+ 
+#ifdef AARCH64
+  bool relax_cas_forward_to(oop p, markOop compare);
+#endif
+
+ #if INCLUDE_ALL_GCS
+   // Like "forward_to", but inserts the forwarding pointer atomically.
+   // Exactly one thread succeeds in inserting the forwarding pointer, and
+diff --git a/hotspot/src/share/vm/oops/oop.inline.hpp b/hotspot/src/share/vm/oops/oop.inline.hpp
+index d4c4d75c0..c3abdb128 100644
+--- a/hotspot/src/share/vm/oops/oop.inline.hpp
+++ b/hotspot/src/share/vm/oops/oop.inline.hpp
+@@ -76,6 +76,12 @@ inline markOop oopDesc::cas_set_mark(markOop new_mark, markOop old_mark) {
+   return (markOop) Atomic::cmpxchg_ptr(new_mark, &_mark, old_mark);
+ }
+ 
+#ifdef AARCH64
+inline markOop oopDesc::relax_cas_set_mark(markOop new_mark, markOop old_mark) {
+  return (markOop)Atomic::relax_cmpxchg_ptr((intptr_t)new_mark, (volatile intptr_t*)&_mark, (intptr_t)old_mark);
+}
+#endif
+
+ inline Klass* oopDesc::klass() const {
+   if (UseCompressedClassPointers) {
+     return Klass::decode_klass_not_null(_metadata._compressed_klass);
+@@ -715,6 +721,30 @@ inline bool oopDesc::cas_forward_to(oop p, markOop compare) {
+   return cas_set_mark(m, compare) == compare;
+ }
+ 
+#ifdef AARCH64
+inline bool oopDesc::relax_cas_forward_to(oop p, markOop compare) {
+  assert(check_obj_alignment(p),
+         "forwarding to something not aligned");
+  assert(Universe::heap()->is_in_reserved(p),
+         "forwarding to something not in heap");
+  markOop m = markOopDesc::encode_pointer_as_mark(p);
+  assert(m->decode_pointer() == p, "encoding must be reversable");
+  markOop old_markoop = relax_cas_set_mark(m, compare);
+  // If CAS succeeded, we must ensure the copy visible to threads reading the forwardee.
+  // (We might delay the fence insertion till pushing contents to task stack as other threads
+  // only need to touch the copied object after stolen the task.)
+  if (old_markoop == compare) {
+    // Once the CAS succeeds, leaf object never needs to be visible to other threads (finished
+    // collection by current thread), so we can save the fence.
+    if (!p->klass()->oop_is_gc_leaf()) {
+      OrderAccess::fence();
+    }
+    return true;
+  }
+  return false;
+}
+#endif
+
+ // Note that the forwardee is not the same thing as the displaced_mark.
+ // The forwardee is used when copying during scavenge and mark-sweep.
+ // It does need to clear the low two locking- and GC-related bits.
+diff --git a/hotspot/src/share/vm/runtime/atomic.hpp b/hotspot/src/share/vm/runtime/atomic.hpp
+index 9ca5fce97..015178b61 100644
+--- a/hotspot/src/share/vm/runtime/atomic.hpp
+++ b/hotspot/src/share/vm/runtime/atomic.hpp
+@@ -94,6 +94,10 @@ class Atomic : AllStatic {
+                                      unsigned int compare_value);
+ 
+   inline static intptr_t cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value);
+#ifdef AARCH64
+  inline static intptr_t relax_cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value);
+#endif
+
+   inline static void*    cmpxchg_ptr(void*    exchange_value, volatile void*     dest, void*    compare_value);
+ };
+ 
--- a/prohibition-of-irreducible-loop-in-mergers.patch
+++ b/prohibition-of-irreducible-loop-in-mergers.patch
@ -1,27 +0,0 @@
-From 34712f6bbc3c2c664ee641c78d4a2f8cfe427880 Mon Sep 17 00:00:00 2001
-Date: Fri, 28 Feb 2020 15:17:44 +0000
-Subject: [PATCH] prohibition of irreducible loop in mergers
-
-Summary: C2Compiler: irreducible loop should not enter merge_many_backedges
-LLT: NA
-Bug url: NA
---
- hotspot/src/share/vm/opto/loopnode.cpp | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/hotspot/src/share/vm/opto/loopnode.cpp b/hotspot/src/share/vm/opto/loopnode.cpp
-index e2c0645cf8..bbb2e2bf98 100644
--- a/hotspot/src/share/vm/opto/loopnode.cpp
-+++ b/hotspot/src/share/vm/opto/loopnode.cpp
-@@ -1542,7 +1542,7 @@ bool IdealLoopTree::beautify_loops( PhaseIdealLoop *phase ) {
-   // If I am a shared header (multiple backedges), peel off the many
-   // backedges into a private merge point and use the merge point as
-   // the one true backedge.
-  if( _head->req() > 3 ) {
-+  if( _head->req() > 3 && !_irreducible) {
-     // Merge the many backedges into a single backedge but leave
-     // the hottest backedge as separate edge for the following peel.
-     merge_many_backedges( phase );
-- 
-2.12.3
-