公司的服务器后台部署在某一个地方,接入的是用户的APP,而该地方的网络信号较差,导致了服务器后台在运行一段时间后用户无法接入,那边的同事反馈使用netstat查看系统,存在较多的TCP连接。
1. 问题分析
首先在公司内部测试服务器上部署,使用LoadRunner做压力测试,能正常运行,然后那边的同事反馈该地方信号较差。考虑到接入的问题,有可能接入进程的FD资源耗尽,导致accept失败。推论的依据是对于TCP连接来说,如果客户端那边由于一些异常情况导致断网而未能向服务器发起FIN关闭消息,服务端这边若没有设置存活检测的话,该连接会存在(存活时间暂未测)。
2. 实验测试
这里简单地写了一个服务端的程序,主要功能是回应,即接受一个报文(格式:2Byte报文长度+报文内容),然后原封不动将报文内容发回客户端。
#include <stdio.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/epoll.h>
#include <unistd.h>
#include <pthread.h>
#include <stdlib.h>
#include <string.h>
#include <arpa/inet.h>
int g_epfd;
int InitServer( unsigned short port )
{
int nServerFd = socket( AF_INET, SOCK_STREAM, 0 );
struct sockaddr_in addr;
memset( &addr, 0, sizeof(addr) );
addr.sin_family = AF_INET;
addr.sin_port = htons( port );
addr.sin_addr.s_addr = 0;
if ( bind( nServerFd, (struct sockaddr *)&addr, sizeof(addr) ) <0 )
{
printf("bind error\n");
exit(-1);
}
if ( listen( nServerFd, 128 ) < 0 )
{
printf("listen error\n");
exit(-1);
}
return nServerFd;
}
int AddFd( int epfd, int nFd , int nOneShot)
{
struct epoll_event event;
memset( &event, 0, sizeof( event) );
event.data.fd = nFd;
event.events |= EPOLLIN | EPOLLRDHUP | EPOLLET;
if ( nOneShot ) event.events |= EPOLLONESHOT;
return epoll_ctl( epfd, EPOLL_CTL_ADD, nFd, &event );
}
int ResetOneShot( int epfd, int nFd )
{
struct epoll_event event;
memset( &event, 0, sizeof(event) );
event.data.fd = nFd;
event.events |= EPOLLIN | EPOLLRDHUP | EPOLLONESHOT;
return epoll_ctl( epfd, EPOLL_CTL_MOD, nFd, &event);
}
void * ReadFromClient( void * arg )
{
int nClientFd = (int)arg;
unsigned char buf[1024];
const int nBufSize = sizeof( buf );
int nRead;
int nTotal;
int nDataLen;
printf("ReadFromClient Enter\n");
if ( (nRead = read( nClientFd, buf, 2 )) != 2 )
{
printf("Read Data Len error\n");
pthread_exit(NULL);
}
nDataLen = *(unsigned short *)buf;
printf("nDataLen [%d]\n", nDataLen);
nDataLen = buf[0]*256 + buf[1];
printf("nDataLen [%d]\n", nDataLen);
nRead = 0;
nTotal = 0;
while( 1 )
{
nRead = read( nClientFd, buf + nRead, nBufSize );
if ( nRead < 0 )
{
printf("Read Data error\n");
pthread_exit( NULL );
}
nTotal += nRead;
if ( nTotal >= nDataLen )
{
break;
}
}
printf("nTotal [%d]\n", nTotal);
sleep(5);
int nWrite = write( nClientFd, buf, nTotal );
printf("nWrite[%d]\n", nWrite);
printf("Not Write ResetOneShot [%d]\n", ResetOneShot(g_epfd, nClientFd));
return NULL;
}
int main(int argc, char const *argv[])
{
int i;
int nClientFd;
pthread_t tid;
struct epoll_event events[1024];
int nServerFd = InitServer( 7777 );
if ( nServerFd < 0 )
{
perror( "nServerFd" );
exit(-1);
}
int epfd = epoll_create( 1024 );
g_epfd = epfd;
int nReadyNums;
if ( AddFd( epfd, nServerFd, 0 ) < 0 )
{
printf("AddFd error\n");
exit(-1);
}
while( 1 )
{
nReadyNums = epoll_wait( epfd, events, 1024, -1 );