CrateDB 5.4: Write thread pool getting stuck after upgrade from 5.3.2

After upgrading from CrateDB 5.3.2 to 5.4.2 with CrateDB JDBC 2.6.0 jar, all write threads are getting blocked with single monitor. Below is the thread dump url for reference.

Thread dump report: Java thread dump analysis report
Link to download :Download Threaddump

cratedb[crate03][write][T#25]" #282 [3360260] daemon prio=5 os_prio=0 cpu=99514.17ms elapsed=149022.00s tid=0x00007f313c017fb0 nid=3360260 waiting on condition  [0x00007ea4087d7000]
   java.lang.Thread.State: WAITING (parking)
	at jdk.internal.misc.Unsafe.park(java.base@20.0.2/Native Method)
	- parking to wait for  <0x00007f3304bdd638> (a java.util.concurrent.LinkedTransferQueue)
	at java.util.concurrent.locks.LockSupport.park(java.base@20.0.2/LockSupport.java:371)
	at java.util.concurrent.LinkedTransferQueue$Node.block(java.base@20.0.2/LinkedTransferQueue.java:470)
	at java.util.concurrent.ForkJoinPool.unmanagedBlock(java.base@20.0.2/ForkJoinPool.java:3780)
	at java.util.concurrent.ForkJoinPool.managedBlock(java.base@20.0.2/ForkJoinPool.java:3725)
	at java.util.concurrent.LinkedTransferQueue.awaitMatch(java.base@20.0.2/LinkedTransferQueue.java:669)
--
	at java.lang.Thread.run(java.base@20.0.2/Thread.java:1623)

"cratedb[crate03][write][T#26]" #283 [3360261] daemon prio=5 os_prio=0 cpu=99261.10ms elapsed=149022.00s tid=0x00007f3120007dd0 nid=3360261 waiting on condition  [0x00007ea4086d6000]
   java.lang.Thread.State: WAITING (parking)
	at jdk.internal.misc.Unsafe.park(java.base@20.0.2/Native Method)
	- parking to wait for  <0x00007f3304bdd638> (a java.util.concurrent.LinkedTransferQueue)
	at java.util.concurrent.locks.LockSupport.park(java.base@20.0.2/LockSupport.java:371)
	at java.util.concurrent.LinkedTransferQueue$Node.block(java.base@20.0.2/LinkedTransferQueue.java:470)
	at java.util.concurrent.ForkJoinPool.unmanagedBlock(java.base@20.0.2/ForkJoinPool.java:3780)
	at java.util.concurrent.ForkJoinPool.managedBlock(java.base@20.0.2/ForkJoinPool.java:3725)
	at java.util.concurrent.LinkedTransferQueue.awaitMatch(java.base@20.0.2/LinkedTransferQueue.java:669)
--
	at java.lang.Thread.run(java.base@20.0.2/Thread.java:1623)

"cratedb[crate03][write][T#27]" #284 [3360262] daemon prio=5 os_prio=0 cpu=98810.41ms elapsed=149022.00s tid=0x00007f314400d960 nid=3360262 waiting on condition  [0x00007ea4085d5000]
   java.lang.Thread.State: WAITING (parking)
	at jdk.internal.misc.Unsafe.park(java.base@20.0.2/Native Method)
	- parking to wait for  <0x00007f3304bdd638> (a java.util.concurrent.LinkedTransferQueue)
	at java.util.concurrent.locks.LockSupport.park(java.base@20.0.2/LockSupport.java:371)
	at java.util.concurrent.LinkedTransferQueue$Node.block(java.base@20.0.2/LinkedTransferQueue.java:470)
	at java.util.concurrent.ForkJoinPool.unmanagedBlock(java.base@20.0.2/ForkJoinPool.java:3780)
	at java.util.concurrent.ForkJoinPool.managedBlock(java.base@20.0.2/ForkJoinPool.java:3725)
	at java.util.concurrent.LinkedTransferQueue.awaitMatch(java.base@20.0.2/LinkedTransferQueue.java:669)
--
	at java.lang.Thread.run(java.base@20.0.2/Thread.java:1623)

"cratedb[crate03][write][T#28]" #285 [3360263] daemon prio=5 os_prio=0 cpu=99012.50ms elapsed=149022.00s tid=0x00007f315004efb0 nid=3360263 waiting on condition  [0x00007ea4084d4000]
   java.lang.Thread.State: WAITING (parking)
	at jdk.internal.misc.Unsafe.park(java.base@20.0.2/Native Method)
	- parking to wait for  <0x00007f3304bdd638> (a java.util.concurrent.LinkedTransferQueue)
	at java.util.concurrent.locks.LockSupport.park(java.base@20.0.2/LockSupport.java:371)
	at java.util.concurrent.LinkedTransferQueue$Node.block(java.base@20.0.2/LinkedTransferQueue.java:470)
	at java.util.concurrent.ForkJoinPool.unmanagedBlock(java.base@20.0.2/ForkJoinPool.java:3780)
	at java.util.concurrent.ForkJoinPool.managedBlock(java.base@20.0.2/ForkJoinPool.java:3725)
	at java.util.concurrent.LinkedTransferQueue.awaitMatch(java.base@20.0.2/LinkedTransferQueue.java:669)
--
	at java.lang.Thread.run(java.base@20.0.2/Thread.java:1623)

"cratedb[crate03][write][T#29]" #286 [3360264] daemon prio=5 os_prio=0 cpu=99243.00ms elapsed=149021.99s tid=0x00007f3148021310 nid=3360264 waiting on condition  [0x00007ea4083d3000]
   java.lang.Thread.State: WAITING (parking)
	at jdk.internal.misc.Unsafe.park(java.base@20.0.2/Native Method)
	- parking to wait for  <0x00007f3304bdd638> (a java.util.concurrent.LinkedTransferQueue)
	at java.util.concurrent.locks.LockSupport.park(java.base@20.0.2/LockSupport.java:371)
	at java.util.concurrent.LinkedTransferQueue$Node.block(java.base@20.0.2/LinkedTransferQueue.java:470)
	at java.util.concurrent.ForkJoinPool.unmanagedBlock(java.base@20.0.2/ForkJoinPool.java:3780)
	at java.util.concurrent.ForkJoinPool.managedBlock(java.base@20.0.2/ForkJoinPool.java:3725)
	at java.util.concurrent.LinkedTransferQueue.awaitMatch(java.base@20.0.2/LinkedTransferQueue.java:669)
--
	at java.lang.Thread.run(java.base@20.0.2/Thread.java:1623)

"cratedb[crate03][write][T#30]" #287 [3360265] daemon prio=5 os_prio=0 cpu=99556.14ms elapsed=149021.99s tid=0x00007f31500712f0 nid=3360265 waiting on condition  [0x00007ea4082d2000]
   java.lang.Thread.State: WAITING (parking)
	at jdk.internal.misc.Unsafe.park(java.base@20.0.2/Native Method)
	- parking to wait for  <0x00007f3304bdd638> (a java.util.concurrent.LinkedTransferQueue)
	at java.util.concurrent.locks.LockSupport.park(java.base@20.0.2/LockSupport.java:371)
	at java.util.concurrent.LinkedTransferQueue$Node.block(java.base@20.0.2/LinkedTransferQueue.java:470)
	at java.util.concurrent.ForkJoinPool.unmanagedBlock(java.base@20.0.2/ForkJoinPool.java:3780)
	at java.util.concurrent.ForkJoinPool.managedBlock(java.base@20.0.2/ForkJoinPool.java:3725)
	at java.util.concurrent.LinkedTransferQueue.awaitMatch(java.base@20.0.2/LinkedTransferQueue.java:669)
--
	at java.lang.Thread.run(java.base@20.0.2/Thread.java:1623)

"cratedb[crate03][write][T#31]" #288 [3360266] daemon prio=5 os_prio=0 cpu=99505.87ms elapsed=149021.99s tid=0x00007f3150072c70 nid=3360266 waiting on condition  [0x00007ea4081d1000]
   java.lang.Thread.State: WAITING (parking)
	at jdk.internal.misc.Unsafe.park(java.base@20.0.2/Native Method)
	- parking to wait for  <0x00007f3304bdd638> (a java.util.concurrent.LinkedTransferQueue)
	at java.util.concurrent.locks.LockSupport.park(java.base@20.0.2/LockSupport.java:371)
	at java.util.concurrent.LinkedTransferQueue$Node.block(java.base@20.0.2/LinkedTransferQueue.java:470)
	at java.util.concurrent.ForkJoinPool.unmanagedBlock(java.base@20.0.2/ForkJoinPool.java:3780)
	at java.util.concurrent.ForkJoinPool.managedBlock(java.base@20.0.2/ForkJoinPool.java:3725)
	at java.util.concurrent.LinkedTransferQueue.awaitMatch(java.base@20.0.2/LinkedTransferQueue.java:669)
--
	at java.lang.Thread.run(java.base@20.0.2/Thread.java:1623)

"cratedb[crate03][write][T#32]" #289 [3360267] daemon prio=5 os_prio=0 cpu=99367.26ms elapsed=149021.99s tid=0x00007f31480222c0 nid=3360267 waiting on condition  [0x00007ea3dbffe000]
   java.lang.Thread.State: WAITING (parking)
	at jdk.internal.misc.Unsafe.park(java.base@20.0.2/Native Method)
	- parking to wait for  <0x00007f3304bdd638> (a java.util.concurrent.LinkedTransferQueue)
	at java.util.concurrent.locks.LockSupport.park(java.base@20.0.2/LockSupport.java:371)
	at java.util.concurrent.LinkedTransferQueue$Node.block(java.base@20.0.2/LinkedTransferQueue.java:470)
	at java.util.concurrent.ForkJoinPool.unmanagedBlock(java.base@20.0.2/ForkJoinPool.java:3780)
	at java.util.concurrent.ForkJoinPool.managedBlock(java.base@20.0.2/ForkJoinPool.java:3725)
	at java.util.concurrent.LinkedTransferQueue.awaitMatch(java.base@20.0.2/LinkedTransferQueue.java:669)

Hi @Satishlk ,
Could you elaborate a bit on your setup and the issue you are observing at a higher level? are you running INSERTs or UPDATEs? are there any exceptions logged in the CrateDB log on the nodes?

1 Like

Hi @hernanc ,

We are using 3 nodes cluster with 32 CPU and 48 GB ram.We are using Crate as secondary database for searches of txn data .We have pipeline job which sync data from mysql to kafka to crate database . We have exposed api’s to aggregate data to show in client application.Insertions are generally at peak of 100-1000 per second. Search request volume will be 1.5k-2.0k request per minute.We are not seeing any exception in logs,but search threads getting clogged as soon we are routing request to crate instance .We have older instance 3.0.6 of crate which is able to manage the same workload with higher response time.We had created new instance of 5.3.2 and then migrated to 5.4.2. We have seen behavior with search,write ,get threads waiting on the same resource when we checked thread dump. We are using java spring boot to connect to crate database with crate jdbc driver 2.6.0 .