Traffic monitoring for Coherence 3.1
641599May 27 2008 — edited May 28 2008The objective of our small project is to monitor the traffic on our coherence clusters. We also were trying to put the cache traffic as a object in the same cache name. The problem we encountered was during performance tests something happened to the coherence clusters and there appears to be some kind of lock not being released for others which made all the weblogic cluster go down. Weblogic went down with "too many open files". We have thread dumps which I can send if you guys need it nevertheless I have attached a part which I suspect is the reason.
Heres the Code that was trying to do the monitoring. The doPut Servlet method does the put , after the put it calls a method RegisterTraffic which has a small logic to increment the count & put back into the cache. It has a Lock for the particular "Traffic" key.
/**
* The Servlets doPut method - Handles the Cache Put Requests
* @param HttpServletRequest request, HttpServletResponse response
* @return void
* @throws CacheException
*/
public void doPut(HttpServletRequest request, HttpServletResponse response) throws
ServletException, IOException {
ServletOutputStream out = response.getOutputStream();
String value = "";
try {
String id = request.getPathInfo();
String expires = request.getHeader("Expires");
String contentType = request.getContentType();
String app_name = request.getHeader("App-Name");
int contentLength = request.getContentLength();
if (contentLength > 0) {
byte valueArray[] = new byte[contentLength];
ServletInputStream in = request.getInputStream();
int bytesRead = 0;
int offset = 0;
while (bytesRead > -1) {
bytesRead =
in.read(valueArray, offset, valueArray.length - offset);
offset += bytesRead;
if (offset == contentLength) {
break;
}
}
DataObject myValue = new DataObject();
myValue.setByte(valueArray);
myValue.setExpirationTime((Long.parseLong(expires))*1000);
Cache_Manager.put(id, myValue);
response.setContentType("application/octet-stream");
value = "ID "+id+" Stored";
out.write(value.getBytes());
out.flush();
RegisterTraffic(app_name,"PUT");
}
} catch (Exception ex) {
response.setContentType("application/octet-stream");
value = "CACHE_ERROR:"+ErrorCode.INTERNAL_PROBLEM_CODE+":"+"doPut:"+ErrorCode.INTERNAL_PROBLEM_MSG;
response.setContentLength(value.length());
out.write(value.getBytes());
throw new ServletException(value+"\n"+ex.getMessage());
}
}
/**
* The Servlets Traffic Monitor method - Handles the Traffic monitoring
* @param appname, get or put or clear
* @return void
* @throws CacheException
*/
public void RegisterTraffic(String appName, String action) {
String trafficKey = "Traffic";
try {
HashMap hmTotal = new HashMap();
HashMap hmToday = new HashMap();
Object obj = null;
HIDataObject dObj = null;
String today = (new java.util.Date().toString()).substring(0,3);
//String today = "SAT";
Long totalTrafficCount = new Long(1);
Long todayTrafficCount = new Long(1);
long totalCnt = 0;
long todayCnt = 0;
// Lock the Object.
Cache_Manager.lock(trafficKey,-1);
try{
dObj = (HIDataObject)Cache_Manager.get(trafficKey);
} catch(java.lang.NullPointerException nex) {
// If this Exception then we are doing it for the first time.
// Ignore this exception
} catch(Exception exe) {
CacheLog.error("CACHE_ERROR: RegisterTraffic Failed with Following Exception\n"+exe.getMessage());
}
if (dObj != null) {
hmTotal = dObj.getTotalTrafficHashMap();
hmToday = dObj.getTodayTrafficHashMap();
}
// HashMap.get will throw error for the first time , so initialize to 1.
try{
totalTrafficCount = (Long)hmTotal.get(appName+"-"+action);
} catch(java.lang.NullPointerException nex) {
CacheLog.error("CACHE_ERROR: RegisterTraffic Failed with Following Exception\n"+nex.getMessage());
}
try{
todayTrafficCount = (Long)hmToday.get(today+"-"+appName+"-"+action);
} catch(java.lang.NullPointerException nex) {
CacheLog.error("CACHE_ERROR: RegisterTraffic Failed with Following Exception\n"+nex.getMessage());
}
try{
totalCnt = totalTrafficCount.longValue();
todayCnt = todayTrafficCount.longValue();
} catch (Exception e) {
}
// Increase the counn here
totalCnt++;todayCnt++;
hmTotal.put(appName+"-"+action,new Long(totalCnt));
hmToday.put(today+"-"+appName+"-"+action,new Long(todayCnt));
try{
HIDataObject myValue = new HIDataObject();
myValue.setTotalTrafficHashMap(hmTotal);
myValue.setTodayTrafficHashMap(hmToday);
myValue.setExpirationTime(86400000);
Cache_Manager.put(trafficKey, myValue);
} catch (Exception exe){
CacheLog.error("CACHE_ERROR: RegisterTraffic Failed with Following Exception\n"+exe.getMessage());
}
} catch (Exception ex) {
CacheLog.error("CACHE_ERROR: RegisterTraffic Failed with Following Exception\n"+ex.getMessage());
} finally {
Cache_Manager.unlock(trafficKey);
}
}
Weblogic Thread Dumps
"TcpRingListener" id=76 idx=0x96 tid=19164 prio=6 alive, in native, daemon
at java/net/PlainSocketImpl.socketAccept(Ljava/net/SocketImpl;)V(Native Method)
at java/net/PlainSocketImpl.accept(Ljava/net/SocketImpl;)V(PlainSocketImpl.java:353)
^-- Holding lock: java/net/PlainSocketImpl@0xc5f4238[thin lock]
at java/net/ServerSocket.implAccept(Ljava/net/Socket;)V(ServerSocket.java:448)
at java/net/ServerSocket.accept()Ljava/net/Socket;(ServerSocket.java:419)
at com/tangosol/coherence/component/net/socket/TcpSocketAccepter.accept()Lcom/tangosol/coherence/component/net/socket/TcpSocket;(TcpSocketAccepter.CDB:17)
at com/tangosol/coherence/component/util/daemon/TcpRingListener.acceptConnection()V(TcpRingListener.CDB:9)
at com/tangosol/coherence/component/util/daemon/TcpRingListener.onNotify()V(TcpRingListener.CDB:1)
at com/tangosol/coherence/component/util/Daemon.run()V(Daemon.CDB:34)
at java/lang/Thread.run()V(Unknown Source)
at jrockit/vm/RNI.c2java(IIII)V(Native Method)
-- end of trace
"DistributedCache" id=78 idx=0x98 tid=19165 prio=5 alive, in native, waiting, daemon
-- Waiting for notification on: com/tangosol/coherence/component/util/daemon/QueueProcessor$Queue@0xc5c6998[fat lock]
at jrockit/vm/Threads.waitForSignal(J)Z(Native Method)
at java/lang/Object.wait(J)V(Native Method)[optimized]
at com/tangosol/coherence/component/util/Daemon.onWait()V(Daemon.CDB:9)[optimized]
^-- Lock released while waiting: com/tangosol/coherence/component/util/daemon/QueueProcessor$Queue@0xc5c6998[fat lock]
at com/tangosol/coherence/component/util/Daemon.run()V(Daemon.CDB:31)
at java/lang/Thread.run()V(Unknown Source)
at jrockit/vm/RNI.c2java(IIII)V(Native Method)
-- end of trace
"ListenThread.Default" id=79 idx=0x9a tid=19166 prio=5 alive, in native
at java/net/PlainSocketImpl.socketAccept(Ljava/net/SocketImpl;)V(Native Method)
at java/net/PlainSocketImpl.accept(Ljava/net/SocketImpl;)V(PlainSocketImpl.java:353)
^-- Holding lock: java/net/PlainSocketImpl@0x1729efc8[thin lock]
at java/net/ServerSocket.implAccept(Ljava/net/Socket;)V(ServerSocket.java:448)
at java/net/ServerSocket.accept()Ljava/net/Socket;(ServerSocket.java:419)
at weblogic/socket/WeblogicServerSocket.accept()Ljava/net/Socket;(WeblogicServerSocket.java:26)
at weblogic/t3/srvr/ListenThread.accept()Ljava/net/Socket;(ListenThread.java:735)
at weblogic/t3/srvr/ListenThread.run()V(ListenThread.java:301)
at jrockit/vm/RNI.c2java(IIII)V(Native Method)
-- end of trace
Blocked lock chains
===================
Chain 2:
"ExecuteThread: '2' for queue: 'weblogic.socket.Muxer'" id=53 idx=0x70 tid=18903 waiting for java/lang/String@0x102fb4d8 held by:
"ExecuteThread: '1' for queue: 'weblogic.socket.Muxer'" id=52 idx=0x6e tid=18902 in chain 1
Coherence Thread Dumps
"PacketPublisher" id=21 idx=0x32 tid=20248 prio=6 alive, in native, waiting, daemon
at jrockit/vm/Threads.waitForSignal(J)Z(Native Method)
at java/lang/Object.wait(J)V(Native Method)
at com/tangosol/coherence/component/util/Daemon.onWait()V(Daemon.CDB:9)
^-- Lock released while waiting: com/tangosol/coherence/component/net/Cluster$PacketPublisher$Queue@0xcb36648[fat lock]
at com/tangosol/coherence/component/util/Daemon.run()V(Daemon.CDB:31)
at java/lang/Thread.run()V(Unknown Source)
at jrockit/vm/RNI.c2java(IIII)V(Native Method)
-- end of trace
"Cluster" id=22 idx=0x34 tid=20249 prio=5 alive, in native, waiting, daemon
-- Waiting for notification on: com/tangosol/coherence/component/net/Cluster$ClusterService$Queue@0xcb30190[fat lock]
at jrockit/vm/Threads.waitForSignal(J)Z(Native Method)
at java/lang/Object.wait(J)V(Native Method)
at com/tangosol/coherence/component/util/Daemon.onWait()V(Daemon.CDB:9)
^-- Lock released while waiting: com/tangosol/coherence/component/net/Cluster$ClusterService$Queue@0xcb30190[fat lock]
at com/tangosol/coherence/component/util/Daemon.run()V(Daemon.CDB:31)
at java/lang/Thread.run()V(Unknown Source)
at jrockit/vm/RNI.c2java(IIII)V(Native Method)
-- end of trace
"PO Async Executor" id=27 idx=0x36 tid=20436 prio=5 alive, in native, waiting, daemon
-- Waiting for notification on: java/lang/Object@0xa7573d8[fat lock]
at jrockit/vm/Threads.waitForSignal(J)Z(Native Method)
at jrockit/vm/Locks.wait(Ljava/lang/Object;J)V(Unknown Source)
at java/lang/Object.wait()V(Native Method)
at com/wily/EDU/oswego/cs/dl/util/concurrent/BoundedLinkedQueue.take()Ljava/lang/Object;(BoundedLinkedQueue.java:225)
^-- Lock released while waiting: java/lang/Object@0xa7573d8[fat lock]
at com/wily/EDU/oswego/cs/dl/util/concurrent/QueuedExecutor$RunLoop.run()V(QueuedExecutor.java:82)
at java/lang/Thread.run()V(Unknown Source)
at jrockit/vm/RNI.c2java(IIII)V(Native Method)
-- end of trace
"TcpRingListener" id=24 idx=0x38 tid=20252 prio=6 alive, in native, daemon
at java/net/PlainSocketImpl.socketAccept(Ljava/net/SocketImpl;)V(Native Method)
at java/net/PlainSocketImpl.accept(Ljava/net/SocketImpl;)V(PlainSocketImpl.java:353)
^-- Holding lock: java/net/PlainSocketImpl@0xd441530[thin lock]
at java/net/ServerSocket.implAccept(Ljava/net/Socket;)V(ServerSocket.java:448)
at java/net/ServerSocket.accept()Ljava/net/Socket;(ServerSocket.java:419)
at com/tangosol/coherence/component/net/socket/TcpSocketAccepter.accept()Lcom/tangosol/coherence/component/net/socket/TcpSocket;(TcpSocketAccepter.CDB:17)
at com/tangosol/coherence/component/util/daemon/TcpRingListener.acceptConnection()V(TcpRingListener.CDB:9)
at com/tangosol/coherence/component/util/daemon/TcpRingListener.onNotify()V(TcpRingListener.CDB:1)
at com/tangosol/coherence/component/util/Daemon.run()V(Daemon.CDB:34)
at java/lang/Thread.run()V(Unknown Source)
at jrockit/vm/RNI.c2java(IIII)V(Native Method)