admin 发布的文章

相关资料

networking.c
请求处理完成后,通过调用 addReply 函数族来完成响应。

void addReply(redisClient *c, robj *obj);
void addReplySds(redisClient *c, sds s);
void addReplyString(redisClient *c, char *s, size_t len);
void addReplyString(redisClient *c, char *s, size_t len) {
    if (prepareClientToWrite(c) != REDIS_OK) return;
    if (_addReplyToBuffer(c,s,len) != REDIS_OK)
        _addReplyStringToList(c,s,len);
}
以 addReplyString 为例,在 addReply 函数中:
首先调用 prepareClientToWrite 函数,完成准备工作。
然后向客户端缓存写入响应内容,写入响应内容时,总是先尝试写入到固定 buf,如果写入失败,再写入到动态分配的链表中。
客户端缓存由两部分组成:固定大小的 buf 和动态分配的 reply(链表)。

#define REDIS_REPLY_CHUNK_BYTES (16*1024) /* 16k output buffer */
char buf[REDIS_REPLY_CHUNK_BYTES];

c->reply = listCreate();

1. 写入响应内容之前,完成准备工作

/* This function is called every time we are going to transmit new data
 * to the client. The behavior is the following:
 *
 * If the client should receive new data (normal clients will) the function
 * returns REDIS_OK, and make sure to install the write handler in our event
 * loop so that when the socket is writable new data gets written.
 *
 * If the client should not receive new data, because it is a fake client,
 * a master, a slave not yet online, or because the setup of the write handler
 * failed, the function returns REDIS_ERR.
 *
 * Typically gets called every time a reply is built, before adding more
 * data to the clients output buffers. If the function returns REDIS_ERR no
 * data should be appended to the output buffers. */
int prepareClientToWrite(redisClient *c) {
    if (c->flags & REDIS_LUA_CLIENT) return REDIS_OK;
    if ((c->flags & REDIS_MASTER) &&
        !(c->flags & REDIS_MASTER_FORCE_REPLY)) return REDIS_ERR;
    if (c->fd <= 0) return REDIS_ERR; /* Fake client */
    if (c->bufpos == 0 && listLength(c->reply) == 0 &&
        (c->replstate == REDIS_REPL_NONE ||
         c->replstate == REDIS_REPL_ONLINE) &&
        aeCreateFileEvent(server.el, c->fd, AE_WRITABLE,
        sendReplyToClient, c) == AE_ERR) return REDIS_ERR;
    return REDIS_OK;
}
在向客户端缓存写入响应内容之前,先向事件处理程序注册写事件,回调函数是:sendReplyToClient()。
此时,客户端缓存为空:c->bufpos == 0 && listLength(c->reply) == 0

2. 写入响应内容

写入响应内容,通过调用以下函数来完成:

int _addReplyToBuffer(redisClient *c, char *s, size_t len);
void _addReplyObjectToList(redisClient *c, robj *o);
void _addReplySdsToList(redisClient *c, sds s);
void _addReplyStringToList(redisClient *c, char *s, size_t len);

注:写入响应内容时,总是先尝试写入到固定 buf,如果写入失败,再写入到动态分配的链表中。
int _addReplyToBuffer(redisClient *c, char *s, size_t len) {
    size_t available = sizeof(c->buf)-c->bufpos;

    if (c->flags & REDIS_CLOSE_AFTER_REPLY) return REDIS_OK;

    /* If there already are entries in the reply list, we cannot
     * add anything more to the static buffer. */
    if (listLength(c->reply) > 0) return REDIS_ERR;

    /* Check that the buffer has enough space available for this string. */
    if (len > available) return REDIS_ERR;

    memcpy(c->buf+c->bufpos,s,len);
    c->bufpos+=len;
    return REDIS_OK;
}
void _addReplyStringToList(redisClient *c, char *s, size_t len) {
    robj *tail;

    if (c->flags & REDIS_CLOSE_AFTER_REPLY) return;

    if (listLength(c->reply) == 0) {
        robj *o = createStringObject(s,len);

        listAddNodeTail(c->reply,o);
        c->reply_bytes += zmalloc_size_sds(o->ptr);
    } else {
        tail = listNodeValue(listLast(c->reply));

        /* Append to this object when possible. */
        if (tail->ptr != NULL &&
            sdslen(tail->ptr)+len <= REDIS_REPLY_CHUNK_BYTES)
        {
            c->reply_bytes -= zmalloc_size_sds(tail->ptr);
            tail = dupLastObjectIfNeeded(c->reply);
            tail->ptr = sdscatlen(tail->ptr,s,len);
            c->reply_bytes += zmalloc_size_sds(tail->ptr);
        } else {
            robj *o = createStringObject(s,len);

            listAddNodeTail(c->reply,o);
            c->reply_bytes += zmalloc_size_sds(o->ptr);
        }
    }
    asyncCloseClientOnOutputBufferLimitReached(c);
}

3. 发送响应内容

发送响应内容,通过 sendReplyToClient 函数来实现。
while(c->bufpos > 0 || listLength(c->reply)) {
    if (c->bufpos > 0) {
        nwritten = write(fd,c->buf+c->sentlen,c->bufpos-c->sentlen);
        if (nwritten <= 0) break;
        c->sentlen += nwritten;
        totwritten += nwritten;

        /* If the buffer was sent, set bufpos to zero to continue with
         * the remainder of the reply. */
        if (c->sentlen == c->bufpos) {
            c->bufpos = 0;
            c->sentlen = 0;
        }
    } else {
        o = listNodeValue(listFirst(c->reply));
        objlen = sdslen(o->ptr);
        objmem = zmalloc_size_sds(o->ptr);

        if (objlen == 0) {
            listDelNode(c->reply,listFirst(c->reply));
            continue;
        }

        nwritten = write(fd, ((char*)o->ptr)+c->sentlen,objlen-c->sentlen);
        if (nwritten <= 0) break;
        c->sentlen += nwritten;
        totwritten += nwritten;

        /* If we fully sent the object on head go to the next one */
        if (c->sentlen == objlen) {
            listDelNode(c->reply,listFirst(c->reply));
            c->sentlen = 0;
            c->reply_bytes -= objmem;
        }
    }
    /* Note that we avoid to send more than REDIS_MAX_WRITE_PER_EVENT
     * bytes, in a single threaded server it's a good idea to serve
     * other clients as well, even if a very large request comes from
     * super fast link that is always able to accept data (in real world
     * scenario think about 'KEYS *' against the loopback interface).
     *
     * However if we are over the maxmemory limit we ignore that and
     * just deliver as much data as it is possible to deliver. */
    server.stat_net_output_bytes += totwritten;
    if (totwritten > REDIS_MAX_WRITE_PER_EVENT &&
        (server.maxmemory == 0 ||
         zmalloc_used_memory() < server.maxmemory)) break;
}

相关资料

redis.c
networking.c

1. 建立连接

/* Create an event handler for accepting new connections in TCP and Unix
 * domain sockets. */
for (j = 0; j < server.ipfd_count; j++) {
    if (aeCreateFileEvent(server.el, server.ipfd[j], AE_READABLE,
        acceptTcpHandler,NULL) == AE_ERR)
        {
            redisPanic(
                "Unrecoverable error creating server.ipfd file event.");
        }
}
if (server.sofd > 0 && aeCreateFileEvent(server.el,server.sofd,AE_READABLE,
    acceptUnixHandler,NULL) == AE_ERR) redisPanic("Unrecoverable error creating server.sofd file event.");
当客户端请求连接时,对应的事件处理函数是:acceptTcpHandler、acceptUnixHandler,其中:
acceptTcpHandler 接受 tcp 连接请求;
acceptUnixHandler 接受 unix 域连接请求。
void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
    int cport, cfd, max = MAX_ACCEPTS_PER_CALL;
    char cip[REDIS_IP_STR_LEN];
    REDIS_NOTUSED(el);
    REDIS_NOTUSED(mask);
    REDIS_NOTUSED(privdata);

    while(max--) {
        cfd = anetTcpAccept(server.neterr, fd, cip, sizeof(cip), &cport);
        if (cfd == ANET_ERR) {
            if (errno != EWOULDBLOCK)
                redisLog(REDIS_WARNING,
                    "Accepting client connection: %s", server.neterr);
            return;
        }
        redisLog(REDIS_VERBOSE,"Accepted %s:%d", cip, cport);
        acceptCommonHandler(cfd,0);
    }
}
anetTcpAccept 接受 tcp 连接,同时返回连接套接字cfd。
acceptCommonHandler 对连接套接字cfd及对应的客户端结构进行初始化。
c = createClient(fd))

初始化主要在 createClient 函数中进行,其中,通过注册事件处理函数 readQueryFromClient 来读取请求。

aeCreateFileEvent(server.el,fd,AE_READABLE,readQueryFromClient, c)

2. 请求处理

请求处理可以分为3个步骤:
1. 读取请求
2. 解析请求
3. 执行请求
readQueryFromClient 函数是请求处理的起点,该函数将请求读取到 c->querybuf 中,然后调用 processInputBuffer 函数。
void processInputBuffer(redisClient *c) {
    /* Keep processing while there is something in the input buffer */
    while(sdslen(c->querybuf)) {
        /* Immediately abort if the client is in the middle of something. */
        if (c->flags & REDIS_BLOCKED) return;

        /* REDIS_CLOSE_AFTER_REPLY closes the connection once the reply is
         * written to the client. Make sure to not let the reply grow after
         * this flag has been set (i.e. don't process more commands). */
        if (c->flags & REDIS_CLOSE_AFTER_REPLY) return;

        /* Determine request type when unknown. */
        if (!c->reqtype) {
            if (c->querybuf[0] == '*') {
                c->reqtype = REDIS_REQ_MULTIBULK;
            } else {
                c->reqtype = REDIS_REQ_INLINE;
            }
        }

        if (c->reqtype == REDIS_REQ_INLINE) {
            if (processInlineBuffer(c) != REDIS_OK) break;
        } else if (c->reqtype == REDIS_REQ_MULTIBULK) {
            if (processMultibulkBuffer(c) != REDIS_OK) break;
        } else {
            redisPanic("Unknown request type");
        }

        /* Multibulk processing could see a <= 0 length. */
        if (c->argc == 0) {
            resetClient(c);
        } else {
            /* Only reset the client when the command was executed. */
            if (processCommand(c) == REDIS_OK)
                resetClient(c);
        }
    }
}
if (c->reqtype == REDIS_REQ_INLINE) {
    if (processInlineBuffer(c) != REDIS_OK) break;
} else if (c->reqtype == REDIS_REQ_MULTIBULK) {
    if (processMultibulkBuffer(c) != REDIS_OK) break;
}

根据请求类型,分别调用 processInlineBuffer 和 processMultibulkBuffer 解析请求。
最后调用 processCommand 函数执行请求。

c->cmd = c->lastcmd = lookupCommand(c->argv[0]->ptr);
根据argv[0],查找CommandTable,找到对应的执行函数。

call(c,REDIS_CALL_FULL);
调用函数执行命令。

相关资料

https://en.wikipedia.org/wiki/Normalization_(statistics)
https://en.wikipedia.org/wiki/Standard_score
https://en.wikipedia.org/wiki/Feature_scaling
https://en.wikipedia.org/wiki/Coefficient_of_variation

https://www.cnblogs.com/feiyumo/p/9866818.html
https://blog.csdn.net/binlin199012/article/details/84638649
归一化:normalization
Z分数:Z-score
特征缩放:Feature scaling
变异系数:Coefficient of variation
In statistics and applications of statistics, normalization can have a range of meanings.
In the simplest cases, normalization of ratings means adjusting values measured on different scales to a notionally common scale, often prior to averaging.
In more complicated cases, normalization may refer to more sophisticated adjustments where the intention is to bring the entire probability distributions of adjusted values into alignment.

归一化是一种简化计算的方式,即将有量纲的表达式,经过变换,化为无量纲的表达式,成为标量。

Z分数

意义:一个给定的值距离平均数多少个标准差?

20191227112606.png

特征缩放

意义:将不同特征的值量化到同一区间。

20191227115303.png

变异系数

意义:离散程度的归一化,用于比较两组数据离散程度的大小。

20191227120048.png

相关资料

https://en.wikipedia.org/wiki/Position-independent_code
https://www.cnblogs.com/Xiao_bird/archive/2010/03/01/1675821.html
https://www.jianshu.com/p/2e12c6180d13

《深入理解计算机系统》链接
静态库解决了许多关于如何让大量相关函数对应用程序可用的问题。然而,静态库仍然有一些明显的缺点。
静态库和所有的软件一样,需要定期维护和更新。如果应用程序员想要使用一个库的最新版本,他们必须以某种方式了解到该库的更新情况,然后显式地将他们的程序与更新了的库重新链接。
另一个问题是几乎每个 C 程序都使用标准 I/O 函数,比如 printf 和 scanf。在运行时,这些函数的代码会被复制到每个运行进程的文本段。在一个运行上百个进程的典型系统上,这将是对稀缺的内存系统资源的极大浪费。(内存的一个有趣属性就是不论系统的内存有多大,它总是一种稀缺资源。)
共享库(shared library)是致力于解决静态库缺陷的一个现代创新产物。共享库是一个目标模块,在运行或加载时,可以加载到任意的内存地址,并和一个在内存中的程序链接起来。
在 Linux 系统中通常用 .so 后缀来表示。

位置无关代码

共享库的一个主要目的就是允许多个正在运行的进程共享内存中相同的库代码,因而节约宝贵的内存资源。那么,多个进程是如何共享程序的一个副本的呢?
可以加载而无需重定位的代码称为位置无关代码(Position-Independent Code, PIC)。用户对 GCC 使用 -fpic 选项指示 GNU 编译系统生成 PIC 代码。共享库的编译必须总是使用该选项。

ldd

ldd prints the shared libraries required by each program.
# ldd nginx
显示 nginx 依赖的共享库。

ldconfig

# -p
Print the lists of directories and candidate libraries stored in the current cache.
显示当前缓存文件所保存的共享库列表。
# /etc/ld.so.cache
File containing an ordered list of libraries found in the directories specified in /etc/ld.so.conf, as well as those found in /lib and /usr/lib.

hello.h

#ifndef HELLO_H
#define HELLO_H

void hello(const char *name);

#endif

hello.c

#include <stdio.h>
void hello(const char *name)
{
    printf("hello,%s!\n",name);
}

main.c

#include "hello.h"
int main()
{
    hello("world");
    return 0;
}

编译运行

# gcc hello.c -fPIC -shared -o libhello.so
# gcc main.c libhello.so -o hello
# mv libhello.so /usr/lib(64)
# ./hello

显式调用

#include <dlfcn.h>

void *dlopen(const char *filename, int flag);
void *dlsym(void *handle, const char *symbol);
int dlclose(void *handle);

相关资料

https://en.wikipedia.org/wiki/Static_library

《深入理解计算机系统》链接

# man ar
# man ranlib
In computer science, a static library or statically-linked library is a set of routines, external functions and variables which are resolved in a caller at compile-time and copied into a target application by a compiler, linker, or binder, producing an object file and a stand-alone executable.

静态库、静态链接库
将所有相关的目标模块打包成为一个单独的文件,称为静态库,它可以用做链接器的输入。
当链接器构造一个可输出的可执行文件时,它只复制静态库里被应用程序引用的目标模块。
在 Linux 系统中,静态库以一种称为存档(archive)的特殊文件格式存放在磁盘中。存档文件是一组连接起来的可重定位目标文件的集合,有一个头部用来描述每个成员目标文件的大小和位置。
存档文件名由后缀 .a 标识。

ar

The GNU ar program creates, modifies, and extracts from archives.

An archive is a single file holding a collection of other files in a structure that makes it possible to retrieve the original individual files.

The original files' contents, mode (permissions), timestamp, owner, and group are preserved in the archive, and can be restored on extraction.
r : Insert the files member... into archive (with replacement).
c : Create the archive.
s : Add an index to the archive, or update it if it already exists.
t : Display a table listing the contents of archive, or those of the files listed in member... that are present in the archive.
v : This modifier requests the verbose version of an operation.
# ar rcs libxxx.a xxx1.o xxx2.o
创建静态库。

# ar t libxxx.a
显示文件列表。

# ar tv libxxx.a
显示文件列表详细信息。

ranlib

ranlib generates an index to the contents of an archive and stores it in the archive.
The index lists each symbol defined by a member of an archive that is a relocatable object file.

You may use nm -s to list this index.
# ranlib libxxx.a
添加索引。

nm

GNU nm lists the symbols from object files objfile....

显示符号表。
# -s
When listing symbols from archive members, include the index: a mapping of which modules contain definitions for which names.
# nm libxxx.a
显示符号表。

# nm -s libxxx.a
显示带索引的符号表。

hello.h

#ifndef HELLO_H
#define HELLO_H

void hello(const char *name);

#endif

hello.c

#include <stdio.h>
void hello(const char *name)
{
    printf("hello,%s!\n",name);
}

main.c

#include "hello.h"
int main()
{
    hello("world");
    return 0;
}

编译运行

# gcc -c hello.c
# ar rcs libhello.a hello.o
# gcc main.c libhello.a -o hello
# ./hello

说明

静态库是由.o文件创建的。因此必须将源程序先编译成.o文件。
静态库文件名的命名规范是:以lib为前缀,紧接着是静态库名,扩展名为.a。

相关资料

https://www.cnblogs.com/zhenyuyaodidiao/p/9288430.html

https://openresty.org/download/agentzh-nginx-tutorials-zhcn.html
https://www.cnblogs.com/jackey2015/p/10375549.html
Nginx 处理请求的过程一共划分为 11 个阶段,按照执行顺序依次是 post-read、server-rewrite、find-config、rewrite、post-rewrite、preaccess、access、post-access、try-files、content 以及 log.

857040-20190417175641694-1612377057.png

内部跳转

所谓“内部跳转”,就是在处理请求的过程中,于服务器内部,从一个 location 跳转到另一个 location 的过程。
既然是内部跳转,当前正在处理的请求就还是原来那个,只是当前的 location 发生了变化,所以还是原来的那一套 Nginx 变量的容器副本。
echo_exec 指令和 rewrite 指令可以发起“内部跳转”。这种跳转会自动修改当前请求的 URI,并且重新匹配与之对应的 location 配置块,再重新执行 rewrite、access、content 等处理阶段。因为是“内部跳转”,所以有别于 HTTP 协议中定义的基于 302 和 301 响应的“外部跳转”,最终用户的浏览器的地址栏也不会发生变化,依然是原来的 URI 位置。
而 ngx_index 模块一旦找到了 index 指令中列举的文件之后,就会发起这样的“内部跳转”,仿佛用户是直接请求的这个文件所对应的 URI 一样。

post-read

最先执行的 post-read 阶段在 Nginx 读取并解析完请求头(request headers)之后就立即开始运行。
支持 Nginx 模块注册处理程序。比如标准模块 ngx_realip 就在 post-read 阶段注册了处理程序。

server-rewrite

post-read 阶段之后便是 server-rewrite 阶段。
当 ngx_rewrite 模块的配置指令直接书写在 server 配置块中时,基本上都是运行在 server-rewrite 阶段。

find-config

紧接在 server-rewrite 阶段后边的是 find-config 阶段。
这个阶段并不支持 Nginx 模块注册处理程序,而是由 Nginx 核心来完成当前请求与 location 配置块之间的配对工作。
换句话说,在此阶段之前,请求并没有与任何 location 配置块相关联。因此,对于运行在 find-config 阶段之前的 post-read 和 server-rewrite 阶段来说,只有 server 配置块以及更外层作用域中的配置指令才会起作用。

rewrite

运行在 find-config 阶段之后的便是我们的老朋友 rewrite 阶段。

post-rewrite

rewrite 阶段再往后便是所谓的 post-rewrite 阶段。
这个阶段也像 find-config 阶段那样不接受 Nginx 模块注册处理程序,而是由 Nginx 核心完成 rewrite 阶段所要求的“内部跳转”操作(如果 rewrite 阶段有此要求的话)。

preaccess

运行在 post-rewrite 阶段之后的是所谓的 preaccess 阶段。
标准模块 ngx_limit_req 和 ngx_limit_zone 就运行在此阶段,前者可以控制请求的访问频度,而后者可以限制访问的并发度。

access

运行在 preaccess 阶段之后的则是我们的另一个老朋友,access 阶段。
标准模块 ngx_access、第三方模块 ngx_auth_request 以及第三方模块 ngx_lua 的 access_by_lua 指令就运行在这个阶段。

post-access

access 阶段之后便是 post-access 阶段。

try-files

紧跟在 post-access 阶段之后的是 try-files 阶段。
这个阶段专门用于实现标准配置指令 try_files 的功能,并不支持 Nginx 模块注册处理程序。

content

Nginx 的 content 阶段是所有请求处理阶段中最为重要的一个,因为运行在这个阶段的配置指令一般都肩负着生成“内容”(content)并输出 HTTP 响应的使命。

log

相关资料

https://en.wikipedia.org/wiki/Skip_list

https://www.cnblogs.com/xuqiang/archive/2011/05/22/2053516.html
https://www.iteye.com/blog/dsqiu-1705530
In computer science, a skip list is a data structure that allows O(log n) search complexity as well as O(log n) insertion complexity within an ordered sequence of n elements.

A skip list is built in layers. The bottom layer is an ordinary ordered linked list. Each higher layer acts as an "express lane" for the lists below, where an element in layer i appears in layer i+1 with some fixed probability p (two commonly used values for p are 1/2 or 1/4).

1920px-Skip_list.svg.png

/*
 * 跳表(skip list)
 */
#include <stdio.h>
#include <stdlib.h>

#define MAX_LEVEL 10 //最大层数

struct node
{
    int key;
    int value;
    struct node *forward[];
};
struct skiplist
{
    int level;
    struct node *header;
};

struct node *createnode(int level, int key, int value);
struct skiplist *create();
void insert(struct skiplist *sl, int key, int value);
void del(struct skiplist *sl, int key);
void display(struct skiplist *sl);
int randomLevel();

int main()
{
    int i;
    struct skiplist *sl=create();

    for(i = 0; i < 20; i++)
    {
        insert(sl,i,i*2);
    }
    display(sl);
    
    del(sl,4);
    display(sl);

    return 0;
}

// 创建节点
struct node *createnode(int level, int key, int value)
{
    struct node *slnode = (struct node *)malloc(sizeof(struct node)+level*sizeof(struct node *));
    slnode->key = key;
    slnode->value = value;

    return slnode;
}

struct skiplist *create()
{
    int i;
    struct skiplist *sl = (struct skiplist *)malloc(sizeof(struct skiplist));

    sl->level = 1;
    sl->header = createnode(MAX_LEVEL, -1, -1);

    for(i = 0; i < MAX_LEVEL; i++)
    {
        sl->header->forward[i] = NULL;
    }

    return sl;
}

void insert(struct skiplist *sl, int key, int value)
{
    struct node *update[MAX_LEVEL];
    struct node *x;
    int i, level;

    x = sl->header;
    for(i = sl->level-1; i >= 0; i--)
    {
        while(x->forward[i] && x->forward[i]->key < key)
        {
            x = x->forward[i];
        }
        update[i] = x;
    }

    // key不能重复
    if(x->forward[0] && x->forward[0]->key == key)
    {
        return ;
    }
    
    level = randomLevel();
    if(level > sl->level)
    {
        for(i = sl->level; i < level; i++)
        {
            update[i] = sl->header;
        }
        sl->level = level;
    }
    
    x = createnode(level,key,value);
    for(i = 0; i < level; i++)
    {
        x->forward[i] = update[i]->forward[i];
        update[i]->forward[i] = x;
    }
}

void del(struct skiplist *sl, int key)
{
    struct node *update[MAX_LEVEL];
    struct node *x;
    int i;

    x = sl->header;
    for(i = sl->level-1; i >= 0; i--)
    {
        while(x->forward[i] && x->forward[i]->key < key)
        {
            x = x->forward[i];
        }
        update[i] = x;
    }

    x = x->forward[0];
    if(x && x->key == key)
    {
        for(i = 0; i < sl->level; i++)
        {
            if(update[i]->forward[i] == x)
            {
                update[i]->forward[i] = x->forward[i];
            }
        }
        free(x);

        while(sl->level > 1 && sl->header->forward[sl->level-1] == NULL)
        {
            sl->level--;
        }
    }
}

void display(struct skiplist *sl)
{
    struct node *x;
    int i;
    
    for(i = sl->level-1; i >= 0; i--)
    {
        x = sl->header;
        while(x->forward[i])
        {
            printf("%d -> ",x->value);
            x = x->forward[i];
        }
        printf("%d\n",x->value);
    }
    printf("\n");
}

// 随机产生层数
int randomLevel()
{
    int k = 1;

    while(rand()%2) k++;
    k = (k < MAX_LEVEL) ? k : MAX_LEVEL;

    return k;
}

相关资料

https://en.wikipedia.org/wiki/LZ77_and_LZ78
https://en.wikipedia.org/wiki/DEFLATE
https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Markov_chain_algorithm
https://github.com/lzfse/lzfse

https://en.wikipedia.org/wiki/Zlib
https://en.wikipedia.org/wiki/Snappy_(compression)
https://en.wikipedia.org/wiki/Zip_(file_format)
https://en.wikipedia.org/wiki/Bzip2

https://www.7-zip.org/

LZ77

LZ77 and LZ78 are the two lossless data compression algorithms published in papers by Abraham Lempel and Jacob Ziv in 1977 and 1978.
These two algorithms form the basis for many variations including LZW, LZSS, LZMA and others.

DEFLATE

In computing, Deflate is a lossless data compression algorithm and associated file format that uses a combination of the LZ77 algorithm and Huffman coding.

LZMA

The Lempel–Ziv–Markov chain algorithm (LZMA) is an algorithm used to perform lossless data compression.
This algorithm uses a dictionary compression scheme somewhat similar to the LZ77 algorithm published by Abraham Lempel and Jacob Ziv in 1977 and features a high compression ratio (generally higher than bzip2).

LZFSE

LZFSE is a Lempel-Ziv style data compression algorithm using Finite State Entropy coding. It targets similar compression rates at higher compression and decompression speed compared to deflate using zlib.

zlib

zlib is a software library used for data compression. zlib was written by Jean-loup Gailly and Mark Adler and is an abstraction of the DEFLATE compression algorithm used in their gzip file compression program.
As of September 2018, zlib only supports one algorithm, called DEFLATE, that is a variation of LZ77.

zlib 是一个开源的函数库,是 DEFLATE 算法的一种实现。
该库支持三种数据格式:ZLIB、DEFLATE、GZIP,对应的标准为:rfc1950、rfc1951、rfc1952。
PHP 中对应的方法为:gzcompress、gzdeflate、gzencode。

gzip

gzip is a file format and a software application used for file compression and decompression.
The program was created by Jean-loup Gailly and Mark Adler as a free software replacement for the compress program used in early Unix systems.

gzip is based on the DEFLATE algorithm, which is a combination of LZ77 and Huffman coding.

gzip 是一种数据格式,基于 DEFLATE 算法实现。

Snappy

Snappy is a fast data compression and decompression library written in C++ by Google based on ideas from LZ77 and open-sourced in 2011.
It does not aim for maximum compression, or compatibility with any other compression library; instead, it aims for very high speeds and reasonable compression.

Snappy 追求的不是更大的压缩率,而是更快的压缩速率和合理的压缩率。

Zip

ZIP is an archive file format that supports lossless data compression. A ZIP file may contain one or more files or directories that may have been compressed.
The ZIP file format permits a number of compression algorithms, though DEFLATE is the most common.

Zip 是一种压缩文件格式。

bzip2

bzip2 is a free and open-source file compression program that uses the Burrows–Wheeler algorithm.
bzip2 compresses most files more effectively than the older LZW (.Z) and Deflate (.zip and .gz) compression algorithms, but is considerably slower.
LZMA is generally more space-efficient than bzip2 at the expense of even slower compression speed, while having much faster decompression.

7-Zip

7-Zip works in Windows.

High compression ratio in 7z format with LZMA and LZMA2 compression.

相关资料

https://en.wikipedia.org/wiki/Transport_Layer_Security#Protocol_details
https://en.wikipedia.org/wiki/Cipher_suite

https://www.cnblogs.com/thammer/p/7083660.html
https://blog.csdn.net/mrpre/article/details/77867439

# man ciphers
The TLS protocol comprises two layers: the TLS record and the TLS handshake protocols.
TLS 报文大致分为2部分:TLS record 和 TLS handshake。

TLS record

Content Type: Handshake (22)
Version: TLS 1.0 (0x0301)
Length: 512
# Content type
This field identifies the Record Layer Protocol Type contained in this Record.

Hex    Dec    Type
0x14    20    ChangeCipherSpec
0x15    21    Alert
0x16    22    Handshake
0x17    23    Application
0x18    24    Heartbeat

Handshake protocol

# Message type
This field identifies the handshake message type.

Code    Description
0    HelloRequest
1    ClientHello
2    ServerHello
4    NewSessionTicket
8    EncryptedExtensions (TLS 1.3 only)
11    Certificate
12    ServerKeyExchange
13    CertificateRequest
14    ServerHelloDone
15    CertificateVerify
16    ClientKeyExchange
20    Finished

Alert protocol

This record should normally not be sent during normal handshaking or application exchanges. However, this message can be sent at any time during the handshake and up to the closure of the session. If this is used to signal a fatal error, the session will be closed immediately after sending this record, so this record is used to give a reason for this closure.

握手过程

1. A client sends a ClientHello message specifying the highest TLS protocol version it supports, a random number, a list of suggested cipher suites and suggested compression methods.
2. The server responds with a ServerHello message, containing the chosen protocol version, a random number, CipherSuite and compression method from the choices offered by the client.
3. The server sends its Certificate message (depending on the selected cipher suite, this may be omitted by the server).
4. The server sends its ServerKeyExchange message (depending on the selected cipher suite, this may be omitted by the server). This message is sent for all DHE and DH_anon ciphersuites.
5. The server sends a ServerHelloDone message, indicating it is done with handshake negotiation.
6. The client responds with a ClientKeyExchange message, which may contain a PreMasterSecret, public key, or nothing. (Again, this depends on the selected cipher.) This PreMasterSecret is encrypted using the public key of the server certificate.
7. The client now sends a ChangeCipherSpec record, essentially telling the server, "Everything I tell you from now on will be authenticated (and encrypted if encryption parameters were present in the server certificate)."
8. Finally, the client sends an authenticated and encrypted Finished message, containing a hash and MAC over the previous handshake messages. The server will attempt to decrypt the client's Finished message and verify the hash and MAC.
9. Finally, the server sends a ChangeCipherSpec, telling the client, "Everything I tell you from now on will be authenticated (and encrypted, if encryption was negotiated)."
10. The server sends its authenticated and encrypted Finished message. The client performs the same decryption and verification procedure as the server did in the previous step.
11. Application phase: at this point, the "handshake" is complete and the application protocol is enabled, with content type of 23. Application messages exchanged between client and server will also be authenticated and optionally encrypted exactly like in their Finished message.

113529_b699c0f4_554033.png

ClientHello

113937_3443ce9f_554033.png

ServerHello

202837_5948cad4_554033.jpeg

ServerKeyExchange

201549_34a670a9_554033.jpeg

ClientKeyExchange

201902_388cef06_554033.jpeg

加密套件

A cipher suite is a set of algorithms that help secure a network connection that uses Transport Layer Security (TLS) or its now-deprecated predecessor Secure Socket Layer (SSL).
The set of algorithms that cipher suites usually contain include: a key exchange algorithm, a bulk encryption algorithm, and a message authentication code (MAC) algorithm.

The key exchange algorithm is used to exchange a key between two devices. This key is used to encrypt and decrypt the messages being sent between two machines.
The bulk encryption algorithm is used to encrypt the data being sent.
The MAC algorithm provides data integrity checks to ensure that the data sent does not change in transit.
In addition, cipher suites can include signatures and an authentication algorithm to help authenticate the server and or client.
命名规范:
如:TLS_RSA_WITH_3DES_EDE_CBC_SHA
1. TLS defines the protocol that this cipher suite is for;
2. RSA indicates the key exchange algorithm being used. The key exchange algorithm is used to determine if and how the client and server will authenticate during the handshake.
3. 3DES_EDE_CBC indicates the block cipher being used to encrypt the message stream, together with the block cipher mode of operation.
4. SHA indicates the message authentication algorithm which is used to authenticate a message.
# openssl ciphers -v
详细列出所有加密套件。

相关资料

https://en.wikipedia.org/wiki/X.509
https://segmentfault.com/q/1010000007085150

# man x509
In cryptography, X.509 is a standard that defines the format of public key certificates.

X.509是由国际电信联盟(ITU-T)制定的数字证书格式标准。

Certificate filename extensions

There are several commonly used filename extensions for X.509 certificates. Unfortunately, some of these extensions are also used for other data such as private keys.

.pem – Base64 encoded DER certificate, enclosed between "-----BEGIN CERTIFICATE-----" and "-----END CERTIFICATE-----".
.cer, .crt, .der – usually in binary DER form.

证书结构

The structure foreseen by the standards is expressed in a formal language, Abstract Syntax Notation One (ASN.1).

The structure of an X.509 v3 digital certificate is as follows:

Certificate
    Version Number                              // 版本号
    Serial Number                               // 序列号(证书唯一标识符)
    Signature Algorithm ID
    Issuer Name                                 // 签发机构
    Validity period                             // 有效期
        Not Before
        Not After
    Subject name                                // 证书主体(证书持有者)
    Subject Public Key Info                     // 公钥信息
        Public Key Algorithm                    // 公钥算法
        Subject Public Key                      // 公钥
    Issuer Unique Identifier (optional)
    Subject Unique Identifier (optional)
    Extensions (optional)
        ...
    Certificate Signature Algorithm             // 证书签名算法
    Certificate Signature                       // 证书签名
证书签名用来保证证书的完整性。

x509

The x509 command is a multi purpose certificate utility.

It can be used to display certificate information, convert certificates to various forms, sign certificate requests like a "mini CA" or edit certificate trust settings.
# -signkey filename
this option causes the input file to be self signed using the supplied private key.

# -req
by default a certificate is expected on input. With this option a certificate request is expected instead.

# -set_serial n
specifies the serial number to use.

# -CA filename
specifies the CA certificate to be used for signing.

# -CAkey filename
sets the CA private key to sign a certificate with.

# -CAcreateserial
with this option the CA serial number file is created if it does not exist.

# -extfile filename
file containing certificate extensions to use. If not specified then no extensions are added to the certificate.

# -extensions section
the section to add certificate extensions from.
# openssl genrsa -out server.key 2048 // 生成私钥
# openssl req -new -key server.key -out server.csr // 生成证书签名请求
# openssl x509 -req -days 365 -in server.csr -signkey server.key -out server.crt // 生成自签名证书
# openssl x509 -in server.crt -text -noout // 查看证书内容

自签名证书

# openssl genrsa -out server.key 2048
# openssl req -new -key server.key -out server.csr
# openssl x509 -req -days 365 -in server.csr -signkey server.key -out server.crt

CA签名证书

// 生成CA证书
# openssl genrsa -out ca.key 2048
# openssl req -new -key ca.key -x509 -days 365 -out ca.crt

// 生成CA签名证书
# openssl genrsa -out server.key 2048
# openssl req -new -key server.key -out server.csr
# openssl x509 -req -days 365 -in server.csr -CA ca.crt -CAkey ca.key -set_serial 01 -out server.crt

相关资料

https://en.wikipedia.org/wiki/Certificate_signing_request
https://www.sslshopper.com/what-is-a-csr-certificate-signing-request.html
https://www.openssl.org/docs/manmaster/man1/req.html

# man req
In Public Key Infrastructure (PKI) systems, a Certificate Signing Request is a message sent from an applicant to a Certificate Authority in order to apply for a digital identity certificate.

It usually contains the public key for which the certificate should be issued, identifying information (such as a domain name) and integrity protection (e.g., a digital signature).

The most common format for CSRs is the PKCS #10 specification.
A CSR or Certificate Signing request is a block of encoded text that is given to a Certificate Authority when applying for an SSL Certificate.
It is usually generated on the server where the certificate will be installed and contains information that will be included in the certificate such as the organization name, common name (domain name), locality, and country.
It also contains the public key that will be included in the certificate. A private key is usually created at the same time that you create the CSR, making a key pair.

A certificate authority will use a CSR to create your SSL certificate, but it does not need your private key. You need to keep your private key secret.
The certificate created with a particular CSR will only work with the private key that was generated with it. So if you lose the private key, the certificate will no longer work.
格式:
Country Name (2 letter code) [XX]: 国家
State or Province Name: 省/市
Locality Name (eg, city) [Default City]: 城市
Organization Name (eg, company) [Default Company Ltd]: 组织机构
Organizational Unit Name (eg, section): 机构部门
Common Name (eg, your name or your server's hostname): 通用名
Email Address: 邮箱地址

req

The req command primarily creates and processes certificate requests in PKCS#10 format.

It can additionally create self signed certificates for use as root CAs for example.
# -new
this option generates a new certificate request.

# -key filename
This specifies the file to read the private key from.

# -in filename
This specifies the input filename to read a request from or standard input if this option is not specified.
A request is only read if the creation options (-new and -newkey) are not specified.

# -out filename
This specifies the output filename to write to or standard output by default.

# -text
prints out the certificate request in text form.

# -noout
this option prevents output of the encoded version of the request.

# -verify
verifies the signature on the request.

# -x509
this option outputs a self signed certificate instead of a certificate request.
This is typically used to generate a test certificate or a self signed root CA.

# -days n
when the -x509 option is being used this specifies the number of days to certify the certificate for.
The default is 30 days.
# openssl genrsa -out server.key 2048 // 生成私钥
# openssl req -new -key server.key -out server.csr // 生成证书签名请求
# openssl req -in server.csr -text -noout // 查看证书签名请求
# openssl req -verify -in server.csr -noout // 校验证书签名请求
# openssl req -new -key server.key -x509 -days 365 -out ca.crt // 生成CA证书
注意:此为证书,不是签名请求。

相关资料

https://en.wikipedia.org/wiki/Public-key_cryptography
https://en.wikipedia.org/wiki/Digital_signature

# man genrsa
# man rsa
# man pkcs8
# man dgst
公钥加密:public key encryption
Public-key cryptography, or asymmetric cryptography, is a cryptographic system that uses pairs of keys: public keys which may be disseminated widely, and private keys which are known only to the owner.
This accomplishes two functions: authentication, which is when the public key is used to verify that a holder of the paired private key sent the message, and encryption, whereby only the holder of the paired private key can decrypt the message encrypted with the public key.

RSA

RSA 算法规定:
明文长度不能超过密钥长度 - 11(单位:字节)。
密文长度正好等于密钥长度。

如果密钥长度为1024位,则:
明文长度最大为117字节。
密文长度为128字节。

genrsa

The genrsa command generates an RSA private key.

rsa

The rsa command processes RSA keys. They can be converted between various forms and their components printed out.

pkcs8

The pkcs8 command processes private keys in PKCS#8 format.
# openssl genrsa -out rsa_private_key.pem 1024 // 生成 RSA 私钥
# openssl rsa -in rsa_private_key.pem -pubout -out rsa_public_key.pem // 生成 RSA 公钥
# openssl pkcs8 -topk8 -in rsa_private_key.pem -nocrypt -out pkcs8_private_key.pem // 将 RSA 私钥转换成 PKCS8 格式
# openssl rsa -in rsa_private_key.pem -noout -text // 查看私钥明细
# openssl rsa -in rsa_public_key.pem -pubin -noout -text // 查看公钥明细

公钥加密(Public Key Encryption)

Public key encryption, in which a message is encrypted with a recipient's public key. The message cannot be decrypted by anyone who does not possess the matching private key.
公钥加密,私钥解密。

# openssl rsautl -encrypt -in test.txt -out test.enc -inkey rsa_public_key.pem -pubin // 公钥加密
# openssl rsautl -decrypt -in test.enc -out test.dec -inkey rsa_private_key.pem // 私钥解密

数字签名(Digital signature)

A digital signature is a mathematical scheme for verifying the authenticity of digital messages or documents.

A valid digital signature gives a recipient very strong reason to believe that the message was created by a known sender (authentication), that the sender cannot deny having sent the message (non-repudiation), and that the message was not altered in transit (integrity).
私钥签名,公钥验证。

# openssl rsautl -sign -in test.txt -inkey rsa_private_key.pem -out test.sign // 私钥签名
# openssl rsautl -verify -in test.sign -inkey rsa_public_key.pem -pubin // 公钥验证
散列签名
签名方案用于消息的散列值,而不是消息本身。

# openssl dgst -sign rsa_private_key.pem -sha1 -out test.sign test.txt
# openssl dgst -verify rsa_public_key.pem -sha1 -signature test.sign test.txt

相关资料

https://en.wikipedia.org/wiki/Symmetric-key_algorithm
https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation

# man enc
对称加密:symmetric-key encryption
Symmetric-key algorithms are algorithms for cryptography that use the same cryptographic keys for both encryption of plaintext and decryption of ciphertext.

Symmetric-key encryption can use either stream ciphers or block ciphers.

常用对称加密算法

DES、AES

加密模式

Electronic Codebook(ECB) 电子密码本

The simplest of the encryption modes is the Electronic Codebook (ECB) mode.
The message is divided into blocks, and each block is encrypted separately.
Cipher Block Chaining(CBC) 密码分组链接

In CBC mode, each block of plaintext is XORed with the previous ciphertext block before being encrypted.
To make each message unique, an initialization vector must be used in the first block.

CBC has been the most commonly used mode of operation. Its main drawbacks are that encryption is sequential (i.e., it cannot be parallelized), and that the message must be padded to a multiple of the cipher block size.

openssl

# openssl des3 -salt -in file.txt -out file.des3
# openssl des3 -d -salt -in file.des3 -out file.txt

# openssl aes-256-cbc -in file.txt -out file.aes

PHP

# openssl_get_cipher_methods
# openssl_encrypt
# openssl_decrypt

密钥生成

你所使用的密钥应该越随机越好,它不能是一个普通的文本字符串,经过哈希函数处理过也不行。
为了生成一个合适的密钥,你应该使用下面提供的 create_key() 方法:

// $key will be assigned a 16-byte (128-bit) random key
$key = create_key(16);
$key = hex2bin($key);
function create_key($length)
{
    if (function_exists('random_bytes'))
    {
        try
    {
        return random_bytes((int) $length);
    }
    catch (Exception $e)
    {
        log_message('error', $e->getMessage());
        return FALSE;
    }
    }

    $is_secure = NULL;
    $key = openssl_random_pseudo_bytes($length, $is_secure);
    return ($is_secure === TRUE) ? $key : FALSE;
}

加密解密

$data = 'hello,world!';
$method = 'aes-128-cbc';
$key = create_key(16);
$iv = ($iv_size = openssl_cipher_iv_length($method)) ? create_key($iv_size) : '';
$cipherText = openssl_encrypt($data,$method,$key,1,$iv);
$plainText = openssl_decrypt($cipherText,$method,$key,1,$iv);
echo base64_encode($cipherText);echo "<br />";echo $plainText;

相关资料

https://www.ietf.org/rfc/rfc793
https://en.wikipedia.org/wiki/Transmission_Control_Protocol
https://en.wikipedia.org/wiki/Maximum_segment_lifetime
https://en.wikipedia.org/wiki/Silly_window_syndrome
https://en.wikipedia.org/wiki/Nagle%27s_algorithm
https://en.wikipedia.org/wiki/Retransmission_(data_networks)
https://en.wikipedia.org/wiki/TCP_congestion_control
https://en.wikipedia.org/wiki/TCP_delayed_acknowledgment

https://www.cnblogs.com/wuchanming/p/4028341.html

# sysctl -a | grep ipv4.tcp
# sysctl -a | grep core
# man tcp
TCP provides reliable, ordered, and error-checked delivery of a stream of octets between applications running on hosts communicating via an IP network.

总结

序列号:用来解决包的顺序问题。
确认号:用来解决丢包问题。
滑动窗口:实现流量控制。

建立连接(Connection establishment)

To establish a connection, TCP uses a three-way handshake.
三次握手。

断开连接(connection termination)

104313_93b8a5f0_554033.png

The connection termination phase uses a four-way handshake, with each side of the connection terminating independently. 

A connection can be "half-open", in which case one side has terminated its end, but the other has not. The side that has terminated can no longer send any data into the connection, but the other side can. The terminating side should continue reading the data until the other side terminates as well.
关闭操作:close、shutdown

https://blog.csdn.net/zhangxiao93/article/details/52078784

MSL

报文段最大生存时间(Maximum segment lifetime)

Maximum segment lifetime is the time a TCP segment can exist in the internetwork system. It is arbitrarily defined to be 2 minutes long.
The Maximum Segment Lifetime value is used to determine the TIME_WAIT interval (2*MSL).

MSS

报文段最大长度(Maximum Segment Size)

The maximum segment size (MSS) is the largest amount of data, specified in bytes, that TCP is willing to receive in a single segment.

ISN

初始序列号(Initial Sequence Number)

The first sequence number used on a connection.
The generator is bound to a 32 bit clock whose low order bit is incremented roughly every 4 microseconds. Thus, the ISN cycles approximately every 4.55 hours.
Since we assume that segments will stay in the network no more than the Maximum Segment Lifetime (MSL) and that the MSL is less than 4.55 hours we can reasonably assume that ISN's will be unique.

RTT

往返时间(Round Trip Time)

Measure the elapsed time between sending a data octet with a particular sequence number and receiving an acknowledgment that covers that sequence number. This measured elapsed time is the Round Trip Time (RTT).

MTU

最大传输单元(Maximum Transmission Unit)

网络层概念(如:IP协议)

In computer networking, the maximum transmission unit (MTU) is the size of the largest network layer protocol data unit that can be communicated in a single network transaction.

Larger MTU is associated with reduced overhead. Smaller values can reduce network delay. In many cases MTU is dependent on underlying network capabilities and must be or should be adjusted manually or automatically so as not to exceed these capabilities.

MTU越大,则通信效率越高而传输延迟增大。

标志(Flags)

SYN: Synchronize sequence numbers. Only the first packet sent from each end should have this flag set.
ACK: indicates that the Acknowledgment field is significant. All packets after the initial SYN packet sent by the client should have this flag set.
PSH: Push function. Asks to push the buffered data to the receiving application.
FIN: No more data from sender.
RST: Reset the connection.

状态

FIN-WAIT-1: represents waiting for an acknowledgment of the connection termination request previously sent.

CLOSE-WAIT: represents waiting for a connection termination request from the local user.

FIN-WAIT-2: represents waiting for a connection termination request from the remote TCP.

LAST-ACK: represents waiting for an acknowledgment of the connection termination request previously sent to the remote TCP.

TIME-WAIT: represents waiting for enough time to pass to be sure the remote TCP received the acknowledgment of its connection termination request.

特征

There are a few key features that set TCP apart from User Datagram Protocol:
1. Ordered data transfer: the destination host rearranges according to sequence number.
2. Retransmission of lost packets: any cumulative stream not acknowledged is retransmitted.
3. Flow control: limits the rate a sender transfers data to guarantee reliable delivery.
4. Congestion control

1. 有序
2. 重传
3. 流量控制
4. 拥塞控制

超时重传

基于超时的重传机制(Timeout-based retransmission)
Whenever a packet is sent, the sender sets a timer that is a conservative estimate of when that packet will be acked. If the sender does not receive an ack by then, it transmits that packet again.
The timer is reset every time the sender receives an acknowledgement.
重传超时(retransmission timeout)
Because of the variability of the networks that compose an internetwork system and the wide range of uses of TCP connections the retransmission timeout must be dynamically determined.

算法:见 rfc793

快速重传

基于重复确认的重传机制(Dupack-based retransmission)
If a single packet (say packet 100) in a stream is lost, then the receiver cannot acknowledge packets above 100 because it uses cumulative ACKs. Hence the receiver acknowledges packet 99 again on the receipt of another data packet.
This duplicate acknowledgement is used as a signal for packet loss. That is, if the sender receives three duplicate acknowledgements, it retransmits the last unacknowledged packet.
A threshold of three is used because the network may reorder packets causing duplicate acknowledgements.

确认机制

选择确认(Selective Acknowledgment)
Relying purely on the cumulative acknowledgment scheme employed by the original TCP protocol can lead to inefficiencies when packets are lost.
For example, suppose bytes with sequence number 1,000 to 10,999 are sent in 10 different TCP segments of equal size, and the second segment (sequence numbers 2,000 to 2,999) is lost during transmission. In a pure cumulative acknowledgment protocol, the receiver can only send a cumulative ACK value of 2,000 (the sequence number immediately following the last sequence number of the received data) and cannot say that it received bytes 3,000 to 10,999 successfully. Thus the sender may then have to resend all data starting with sequence number 2,000.

To alleviate this issue TCP employs the selective acknowledgment (SACK) option, defined in 1996 in RFC 2018, which allows the receiver to acknowledge discontinuous blocks of packets which were received correctly.
累积确认(Cumulative Acknowledgment)
TCP延迟确认(TCP delayed acknowledgment)
TCP delayed acknowledgment is a technique used by some implementations of the Transmission Control Protocol in an effort to improve network performance.
In essence, several ACK responses may be combined together into a single response, reducing protocol overhead. However, in some circumstances, the technique can reduce application performance.

流量控制(flow control

TCP uses an end-to-end flow control protocol to avoid having the sender send data too fast for the TCP receiver to receive and process it reliably.

TCP uses a sliding window flow control protocol. In each TCP segment, the receiver specifies in the receive window field the amount of additionally received data (in bytes) that it is willing to buffer for the connection. The sending host can send only up to that amount of data before it must wait for an acknowledgment and window update from the receiving host.

When a receiver advertises a window size of 0, the sender stops sending data and starts the persist timer. The persist timer is used to protect TCP from a deadlock situation that could arise if a subsequent window size update from the receiver is lost, and the sender cannot send more data until receiving a new window size update from the receiver. When the persist timer expires, the TCP sender attempts recovery by sending a small packet so that the receiver responds by sending another acknowledgement containing the new window size.

If a receiver is processing incoming data in small increments, it may repeatedly advertise a small receive window. This is referred to as the silly window syndrome.
TCP 使用端到端的流量控制协议(即滑动窗口协议)来防止发送方发送数据太快导致接收方处理不过来。
TCP 头有一个 window 字段,该字段表示接收方告诉发送方自己还有多少缓冲区可以接收数据,发送方就可以根据该值来发送数据。
当接收窗口变为0的时候,发送方停止发送数据,同时开启一个定时器。定时器的作用是防止死锁,即接收方更新了窗口大小却丢失了,
那么发送方就无法发送数据。当定时器过期时,发送方尝试向接收方发送一个小包数据,接收方进行确认并更新窗口大小。
如果接收方每次只处理少量数据,导致每次告诉发送方的都是很小的窗口大小,这就是“糊涂窗口综合症”。
滑动窗口协议(sliding window protocol)
  发送已确认
  发送未确认
  未发送
  未发送且对端不允许发送
糊涂窗口综合症(silly window syndrome)
A serious problem can arise in the sliding window operation when the sending application program creates data slowly, the receiving application program consumes data slowly, or both. If a server with this problem is unable to process all incoming data, it requests that its clients reduce the amount of data they send at a time. If the server continues to be unable to process all incoming data, the window becomes smaller and smaller, sometimes to the point that the data transmitted is smaller than the packet header, making data transmission extremely inefficient.

When the silly window syndrome is created by the sender, Nagle's algorithm is used. Nagle's solution requires that the sender send the first segment even if it is a small one, then that it wait until an ACK is received or a maximum sized segment (MSS) is accumulated.
When the silly window syndrome is created by the receiver, David D Clark's solution is used.[citation needed] Clark's solution closes the window until another segment of maximum segment size (MSS) can be received or the buffer is half empty.

如果“糊涂窗口综合症”是由发送方引起的,那么就采用 Nagle 算法。先发送一个小包数据,等待确认或者数据积累到一定量(MSS)后再发送。
如果“糊涂窗口综合症”是由接收方引起的,那么就采用 David D Clark 方案,关闭窗口直到缓冲区半空或者能够接收一个 MSS 报文段再打开窗口。
Nagle算法
Nagle's algorithm is a means of improving the efficiency of TCP/IP networks by reducing the number of packets that need to be sent over the network.

The RFC describes what he called the "small-packet problem", where an application repeatedly emits data in small chunks, frequently only 1 byte in size. Since TCP packets have a 40-byte header (20 bytes for TCP, 20 bytes for IPv4), this results in a 41-byte packet for 1 byte of useful information, a huge overhead.

Nagle's algorithm works by combining a number of small outgoing messages and sending them all at once. Specifically, as long as there is a sent packet for which the sender has received no acknowledgment, the sender should keep buffering its output until it has a full packet's worth of output, thus allowing output to be sent all at once.

Nagle 算法主要是积累数据,然后一次发送。只要有一个已被发送的包还没有被确认,发送方就应该把数据放到缓冲区里,直到达到一个 MSS 报文段再发送。

拥塞控制(congestion control

The final main aspect of TCP is congestion control. TCP uses a number of mechanisms to achieve high performance and avoid congestion collapse, where network performance can fall by several orders of magnitude. These mechanisms control the rate of data entering the network, keeping the data flow below a rate that would trigger collapse.

Modern implementations of TCP contain four intertwined algorithms: slow-start, congestion avoidance, fast retransmit, and fast recovery.

134559_fd347139_554033.png
161024_65c03d27_554033.png

慢启动(slow-start)
Slow-start is used in conjunction with other algorithms to avoid sending more data than the network is capable of transmitting, that is, to avoid causing network congestion.

Slow start begins initially with a congestion window size (CWND) of 1, 2, 4 or 10 MSS. The value for the congestion window size will be increased by one with each acknowledgement (ACK) received, effectively doubling the window size each round-trip time.[c] The transmission rate will be increased by the slow-start algorithm until either a loss is detected, or the receiver's advertised window (rwnd) is the limiting factor, or ssthresh is reached.
拥塞避免(congestion avoidance)
Once ssthresh is reached, TCP changes from slow-start algorithm to the linear growth (congestion avoidance) algorithm. At this point, the window is increased by 1 segment for each round-trip delay time (RTT).
快速恢复(fast recovery)
When a loss occurs, a fast retransmit is sent, half of the current CWND is saved as ssthresh and as new CWND, thus skipping slow start and going directly to the congestion avoidance algorithm. The overall algorithm here is called fast recovery.
快速重传(fast retransmit)

Fast retransmit is an enhancement to TCP that reduces the time a sender waits before retransmitting a lost segment.
A TCP sender normally uses a simple timer to recognize lost segments. If an acknowledgement is not received for a particular segment within a specified time, the sender will assume the segment was lost in the network, and will retransmit the segment.

Duplicate acknowledgement is the basis for the fast retransmit mechanism. Duplicate acknowledgement occurs when the sender receives more than one acknowledgement with the same sequence number.

When a sender receives several duplicate acknowledgements, it can be reasonably confident that the segment with the next higher sequence number was dropped. A sender with fast retransmit will then retransmit this packet immediately without waiting for its timeout.
Reno 算法:
先采用慢启动算法,达到 ssthresh 后切换到拥塞避免算法,当检测到包丢失后,采用快速重传机制,然后使用快速恢复算法。

# sysctl net.ipv4.tcp_available_congestion_control // 可用的拥塞控制算法
# sysctl net.ipv4.tcp_allowed_congestion_control // 允许使用的拥塞控制算法
# sysctl net.ipv4.tcp_congestion_control // 默认的拥塞控制算法

默认的拥塞控制算法:cubic

内核参数

net.ipv4.tcp_rmem
接收缓存(自动调整)。
This is a vector of 3 integers: [min, default, max]. These parameters are used by TCP to regulate receive buffer  sizes.
TCP dynamically adjusts the size of the receive buffer from the defaults listed below, in the range of these  values, depending on memory available in the system.

net.ipv4.tcp_wmem
发送缓存(自动调整)。
This is a vector of 3 integers: [min, default, max]. These parameters are used by TCP to regulate send buffer sizes.
TCP dynamically adjusts the size of the send buffer from the default values listed below, in the range of these values, depending on memory available.

net.ipv4.tcp_keepalive_time
TCP发送keepalive探测消息的时间间隔(默认:7200 单位:秒)。
The number of seconds a connection needs to be idle before TCP begins sending out keep-alive probes.

net.ipv4.tcp_keepalive_intvl
探测消息未响应,重发该消息的时间间隔(默认:75 单位:秒)。
The number of seconds between TCP keep-alive probes.

net.ipv4.tcp_keepalive_probes
在确认TCP连接断开之前,最多发送多少个keepalive探测消息(默认:9)
The maximum number of TCP keep-alive probes to send before giving up and killing the connection if no response is obtained from the other end.

net.ipv4.tcp_fin_timeout
对于本端断开的套接字连接,TCP保持在FIN-WAIT-2状态的时间(秒)。
This specifies how many seconds to wait for a final FIN packet before the socket is forcibly closed.

net.ipv4.tcp_max_tw_buckets
同时保持 TIME-WAIT 状态的套接字的最大数量。

net.ipv4.tcp_max_syn_backlog
SYN 队列的最大长度。

net.ipv4.tcp_syncookies
当 SYN 队列溢出时,启用 cookies 处理,用以防范SYN Flood攻击(半连接攻击)。

net.ipv4.tcp_sack
启用选择确认机制。

net.ipv4.tcp_window_scaling
启用窗口缩放(window scaling)。

net.ipv4.tcp_syn_retries
主动新建连接时,内核要重试多少次 SYN 请求后才决定放弃。
The maximum number of times initial SYNs for an active TCP connection attempt will be retransmitted.

net.ipv4.tcp_synack_retries
收到 SYN 请求后,内核要重试多少次 ACK 确认才决定放弃。
The maximum number of times a SYN/ACK segment for a passive TCP connection will be retransmitted.

net.ipv4.tcp_retries2
对于已建立的 TCP 连接,TCP 包要重传多少次才决定放弃。
The maximum number of times a TCP packet is retransmitted in established state before giving up.
net.core.somaxconn
系统中每一个端口的监听队列的最大长度。

net.core.optmem_max
每个套接字所允许的最大缓冲区的大小。

local host = "192.168.16.2"
local port = 6379

function getClientIp()
    local headers = ngx.req.get_headers()
    local ip = headers["X-Real-IP"]
    if ip == nil then
        ip = headers["X-Forwarded-For"]
    end
    if ip == "unknown" or ip == nil then
        ip = ngx.var.remote_addr
    end
    if ip == nil then
        ip = "0.0.0.0"
    end

    return ip
end

function ipDeny()
    local ip = getClientIp()
    local redis = require "resty.redis"
    local red = redis:new()

    red:set_timeouts(1000, 1000, 1000) -- 1 sec

    local ok, err = red:connect(host, port)
    if not ok then
        ngx.say("failed to connect: ", err)
        return
    end

    isDeny, err = red:sismember("ip_blacklist", ip)

    return isDeny
end

function countQueue()
    local redis = require "resty.redis"
    local red = redis:new()

    red:set_timeouts(1000, 1000, 1000) -- 1 sec

    local ok, err = red:connect(host, port)
    if not ok then
        ngx.say("failed to connect: ", err)
        return
    end

    count, err = red:incr("num")

    return count
end

function read(filename)
    local path = "/etc/nginx/conf.d"
    local file = io.open(path..'/'..filename,"r")
    if file == nil then
        return
    end 

    t = {}
    for line in file:lines() do
        table.insert(t,line)
    end 
    file:close()

    return t
end

function uaFilter()
    local rules = read("user-agent")
    local ua = ngx.var.http_user_agent

    if ua == nil then
        ngx.exit(403)
    end 

    for _, rule in pairs(rules) do
        if rule ~= "" and ngx.re.match(ua,rule,"isjo") then
            ngx.exit(403)
        end
    end

    return
end

相关资料

https://en.wikipedia.org/wiki/C_file_input/output
https://www.cnblogs.com/wonderKK/archive/2012/06/13/2547740.html
https://blog.csdn.net/shadow7499158/article/details/38931241

《UNIX环境高级编程》第5章

# man stdio
标准IO是基于流的。对一个进程预定义了三个流,它们是:标准输入、标准输出和标准出错。
#include <stdio.h>
FILE *stdin;
FILE *stdout;
FILE *stderr;

标准IO缓冲

标准IO提供缓冲的目的是尽可能减少使用 read 和 write 调用的次数。
全缓冲:在填满标准IO缓冲区后才进行实际的IO操作。
行缓冲:当在输入和输出中遇到换行符时,标准IO执行IO操作。
无缓冲:标准IO库不对字符进行缓冲存储。
ISO C要求缓冲具有下列特征:
1. 当且仅当标准输入和标准输出并不涉及交互式设备时,它们才是全缓冲的。
2. 标准出错决不会是全缓冲的。

但是,这并没有告诉我们,如果标准输入和输出涉及交互式设备时,它们是不带缓冲的还是行缓冲的;以及标准出错是不带缓冲的还是行缓冲的。
通常情况下:
对于驻留在磁盘上的文件通常是由标准IO库实施全缓冲的。
当流涉及一个终端时(例如标准输入和标准输出),通常使用行缓冲。
标准出错是不带缓冲的。
#include <stdio.h>

void setbuf(FILE *stream, char *buf);
int setvbuf(FILE *stream, char *buf, int mode, size_t size);

对于任何给定的流,可以调用上面两个函数更改缓冲类型。
可以使用 setbuf 函数打开或关闭缓冲机制。为了带缓冲进行IO,参数 buf 必须指向一个长度为 BUFSIZ 的缓冲区(该常量定义在<stdio.h>中),通常在此之后该流就是全缓冲的。为了关闭缓冲,将 buf 设置为 NULL。
使用 setvbuf,我们可以精确地指定所需的缓冲类型。这是通过 mode 参数实现的:
_IONBF unbuffered 不带缓冲
_IOLBF line buffered 行缓冲
_IOFBF fully buffered 全缓冲

fclose

#include <stdio.h>

int fclose(FILE *fp);
在该文件被关闭之前,冲洗缓冲区中的输出数据。丢弃缓冲区中的任何输入数据。
当一个进程正常终止时(直接调用 exit 函数,或从 main 函数返回),则所有带未写缓冲数据的标准IO流都会被冲洗,所有打开的标准IO流都会被关闭。

fread

#include <stdio.h>

size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream);

fwrite

#include <stdio.h>

size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream);

fflush

#include <stdio.h>

int fflush(FILE *stream);

清空文件流缓冲区。

fileno

#include <stdio.h>

int fileno(FILE *stream);

获取文件描述符。

格式化输出

#include <stdio.h>

int printf(const char *format, ...);
int fprintf(FILE *stream, const char *format, ...);
int sprintf(char *str, const char *format, ...);
int snprintf(char *str, size_t size, const char *format, ...);
The functions in the printf() family produce output according to a format.

The function printf() write output to stdout, the standard output stream;
The function fprintf() write output to the given output stream;
The functions sprintf() and snprintf() write to the character string str.

The function snprintf() write at most size bytes (including the terminating null byte ('\0')) to str.

Given a non-empty array containing only positive integers, find if the array can be partitioned into two subsets such that the sum of elements in both subsets is equal.

NOTE:
1. Each of the array element will not exceed 100.
2. The array size will not exceed 200.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int canPartition(int *nums, int numsSize);

int main()
{
    int nums[] = {1,5,11,5};
    int n = 4;
    int can = 0;

    can = canPartition(nums,n);
    printf("result is: %d\n",can);

    return 0;
}

int canPartition(int *nums, int numsSize)
{
    int i = 0, j = 0;
    int total = 0;
    int half = 0;
    int max = 0;
    int *sum; // sum[i]=1表示和等于i

    for(i = 0; i < numsSize; i++)
    {
        total += nums[i];
        max = max > nums[i] ? max : nums[i];
    }

    if(total%2) return 0; // 和为奇数,不能划分为等和子集
    if(max > total/2) return 0; // 最大元素大于和的一半,不能划分为等和子集

    half = total/2;
    sum = (int *)malloc((half+1)*sizeof(int));
    memset(sum,0,(half+1)*sizeof(int));

    sum[0] = 1; // 和等于0一定成立,空集就行
    for(i = 0; i < numsSize; i++)
    {
        for(j = half; j >= nums[i]; j--)
        {
            if(sum[j-nums[i]])
            {
                sum[j] = 1;
                if(j == half)
                {
                    return 1;
                }
            }
        }
    }

    return 0;
}

相关资料

https://leetcode.com/problems/edit-distance/
https://www.cnblogs.com/easonliu/p/3661537.html
https://blog.csdn.net/baodream/article/details/80417695
Given two words word1 and word2, find the minimum number of operations required to convert word1 to word2.

You have the following 3 operations permitted on a word:
1. Insert a character
2. Delete a character
3. Replace a character

解法

定义:d[i][j] 为 word1 前 i 个字符与 word2 前 j 个字符的最小编辑距离,则:
  d[i][0] = i;
  d[0][j] = j;
  d[i][j] = min(d[i-1][j]+1,d[i][j-1]+1,d[i-1][j-1]+cost)
其中:word1[i] == word2[j],则 cost = 0,否则 cost = 1;
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int minDistance(char *s1, char *s2);
int min(int a, int b);

int main()
{
    char *s1 = "horse";
    char *s2 = "ros";

    int d = minDistance(s1,s2);

    printf("min edit distance is: %d\n", d);

    return 0;
}

int minDistance(char *s1, char *s2)
{
    int i,j;
    int cost = 0;
    int len1 = strlen(s1);
    int len2 = strlen(s2);
    int **d = (int **)malloc((len1+1)*sizeof(int *)); // 分配指针数组
    d[0] = (int *)malloc((len1+1)*(len2+1)*sizeof(int)); // 二维数组空间

    for(i = 1; i < (len1+1); i++)
    {
        d[i] = d[i-1] + (len2+1);
    }
    memset(d[0],0,(len1+1)*(len2+1)*sizeof(int));

    if (len1 == 0) return len2;
    if (len2 == 0) return len1;

    for (i = 1; i <= len1; i++)
    {
        d[i][0] = i;
    }
    for (j = 1; j <= len2; j++)
    {
        d[0][j] = j;
    }
    for (i = 1; i <= len1; i++)
    {
        for (j = 1; j <= len2; j++)
        {
            if (s1[i-1] == s2[j-1]) cost = 0;
            else cost = 1;

            d[i][j] = min(d[i-1][j]+1, min(d[i][j-1]+1, d[i-1][j-1]+cost));
        }
    }

    return d[len1][len2];
}

int min(int a, int b)
{
    return a < b ? a : b;
}

You are climbing a stair case. It takes n steps to reach to the top.
Each time you can either climb 1 or 2 steps. In how many distinct ways can you climb to the top?

解法

1、n = 1,只有一种方法;
2、n = 2,有两种方法;
3、对于n > 2,则有 第一步爬1级台阶方法数 + 第一步爬2级台阶方法数,即:
                 f(n) = f(n-1) + f(n-2)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int climbStairs(int n, int *steps);

int main()
{
    int n = 10;
    int *steps = malloc(sizeof(int)*(n+1));
    memset(steps,0,sizeof(int)*(n+1));

    steps[n] = climbStairs(n,steps);
    printf("steps=%d\n",steps[n]);

    return 0;
}

int climbStairs(int n, int *steps)
{
    if(n == 1)
    {
        steps[n] = 1;
    }
    else if(n == 2)
    {
        steps[n] = 2;
    }
    else
    {
        if(steps[n-1] == 0)
        {
            steps[n-1] = climbStairs(n-1,steps);
        }
        if(steps[n-2] == 0)
        {
            steps[n-2] = climbStairs(n-2,steps);
        }
        steps[n] = steps[n-1] + steps[n-2];
    }

    return steps[n];
}

Merge k sorted linked lists and return it as one sorted list. Analyze and describe its complexity.

参考资料

https://www.cnblogs.com/zywscq/p/5403051.html
https://www.cnblogs.com/skysand/p/4300711.html
#include <stdio.h>
#include <stdlib.h>

typedef struct node {
    int value;
    struct node *next;
}node; 

struct heap {
    int ele;
    struct node *head;
};

node **generateList(int (*data)[6], int row, int column);
void displayList(node **head, int row);
void heapsort(struct heap *heap);
void makeHeap(struct heap *heap);
void siftDown(struct heap *heap, int i, int len);

int main()
{
    int i,j;
    int data[][6] = {{1,2,21,47},
                     {15,60,65,84},
                     {77,88,93,100},
                     {5,8,17,36},
                     {20,24,63,72}};
    int *merged = (int *)malloc(sizeof(data));
    int row = sizeof(data)/sizeof(data[0]);
    int column = sizeof(data[0])/sizeof(data[0][0]);
    struct heap *h = (struct heap *)malloc(sizeof(struct heap)*(row+1));

    node **head;

    head = generateList(data,row,column);
    displayList(head,row);

    h[0].ele = row;
    h[0].head = NULL;
    for(i = 0; i < row; i++)
    {
        h[i+1].ele = head[i]->value;
        h[i+1].head = head[i];
    }
    heapsort(h);

    i = 0;
    while(1)
    {
        if(h[0].ele == 0) break;

        merged[i++] = h[1].ele;
        if(h[1].head->next != NULL)
        {
            h[1].head = h[1].head->next;
            h[1].ele = h[1].head->value;
        }
        else
        {
            h[1] = h[h[0].ele];
            h[0].ele--;
        }
        heapsort(h);
        
    }

    printf("合并后数组:");
    for(j = 0; j < i; j++)
    {
        printf("%d ",merged[j]);
    }
    printf("\n");

    return 0;
}

/*
 * 生成链表
 * @param data 二维数组
 * @param row 行数
 * @param column 列数
 */
node **generateList(int (*data)[6], int row, int column)
{
    int i,j;
    node **head = (node **)malloc(sizeof(node *)*row);
    node **tail = (node **)malloc(sizeof(node *)*row);
    
    for(i = 0; i < row; i++)
    {
        tail[i] = (node *)malloc(sizeof(node));
        head[i] = tail[i];
        for(j = 0; j < column; j++)
        {
            if(data[i][j] != 0)
            {
                tail[i]->value = data[i][j];
                if(data[i][j+1] != 0)
                {
                    tail[i]->next = (node *)malloc(sizeof(node));
                    tail[i] = tail[i]->next;
                }
                else
                {
                    tail[i]->next = NULL;
                }
            }
        }
    }
    
    return head;
}

void displayList(node **head, int row)
{
    int i;
    node *cur;

    for(i = 0; i < row; i++)
    {
        cur = head[i];
        while(cur != NULL)
        {
            printf("%d ",cur->value);
            cur = cur->next;
        }
        printf("\n");
    }
}

void heapsort(struct heap *heap)
{
    int len = heap[0].ele;
    int i;
    struct heap temp;
    
    makeHeap(heap);
    
    i = len;
    while(i > 1)
    {
        temp = heap[1];
        heap[1] = heap[i];
        heap[i] = temp;
        i--;

        siftDown(heap,1,i);
    }
}

void makeHeap(struct heap *heap)
{
    int len = heap[0].ele;
    int i;

    for(i = len/2; i >= 1; i--)
    {
        siftDown(heap, i, len);
    }
}

/*
 * 使父节点的值大于其子节点的值
 * @param heap 堆地址
 * @param i 节点索引
 * @param len 长度
 */
void siftDown(struct heap *heap, int i, int len)
{
    struct heap temp;

    if(i < 1 || 2*i > len) return;

    temp = heap[i];
    while(2*i <= len)
    {
        i *= 2;
        if((i < len) && (heap[i].ele < heap[i+1].ele))
        {
            i += 1;//找出子节点中的较大者
        }

        if(temp.ele >= heap[i].ele)
        {
            i /= 2;
            break;
        }

        heap[i/2] = heap[i];
    }
    heap[i] = temp;
}