在监控设备的时候,在server端的日志中有时候会见到类似another network error, wait for 15s seconds的异常,今天我们看下这个问题的出现原因和解决方案:
问题定位到poller.c,看下下面两份代码:
这个get_values的部分代码:
for (i = 0; i < num; i++) { switch (errcodes[i]) { case SUCCEED: case NOTSUPPORTED: case AGENT_ERROR: if (HOST_AVAILABLE_TRUE != last_available) { zbx_activate_item_host(&items[i], ×pec); last_available = HOST_AVAILABLE_TRUE; } break; case NETWORK_ERROR: case GATEWAY_ERROR: case TIMEOUT_ERROR: if (HOST_AVAILABLE_FALSE != last_available) { zbx_deactivate_item_host(&items[i], ×pec, results[i].msg); last_available = HOST_AVAILABLE_FALSE; } break; case CONFIG_ERROR: /* nothing to do */ break; default: zbx_error("unknown response code returned: %d", errcodes[i]); THIS_SHOULD_NEVER_HAPPEN; }