博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
cJSON源码分析3-核心解析算法
阅读量:2381 次
发布时间:2019-05-10

本文共 22377 字,大约阅读时间需要 74 分钟。

解析函数

对于字符串,调用cJSON_Parse解析为一个cJSON对象

CJSON_PUBLIC(cJSON *) cJSON_Parse(const char *value);

parser_buffer

为存解析的字符串,使用parse_buffer.

//解析的缓存typedef struct{
const unsigned char *content; //字符串内容 size_t length; //长度 size_t offset; //当前位置 size_t depth; //当前位置对象或数组的深度 internal_hooks hooks; //使用的hook} parse_buffer;

parse_buffer常用函数的宏定义:

//判断buffer能否读取到第size个字节。条件:buffer不为空,且buffer当前的位置+size不大于buffer的总长度#define can_read(buffer, size) ((buffer != NULL) && (((buffer)->offset + size) <= (buffer)->length))//是否能方位index位置#define can_access_at_index(buffer, index) ((buffer != NULL) && (((buffer)->offset + index) < (buffer)->length))#define cannot_access_at_index(buffer, index) (!can_access_at_index(buffer, index))//在offset位置的字符串指针#define buffer_at_offset(buffer) ((buffer)->content + (buffer)->offset)

具体解析过程

//调用cJSON_ParseWithOptsCJSON_PUBLIC(cJSON *) cJSON_Parse(const char *value){
return cJSON_ParseWithOpts(value, 0, 0);}CJSON_PUBLIC(cJSON *) cJSON_ParseWithOpts(const char *value, const char **return_parse_end, cJSON_bool require_null_terminated){
//1.1 初始化解析的buffer parse_buffer buffer = {
0, 0, 0, 0, {
0, 0, 0 } }; //item是返回的cJSON对象 cJSON *item = NULL; /* reset error position */ //1.2 初始化全局错误 global_error.json = NULL; global_error.position = 0; if (value == NULL) {
goto fail; } //2. 填充buffer buffer.content = (const unsigned char*)value; buffer.length = strlen((const char*)value) + sizeof(""); buffer.offset = 0; buffer.hooks = global_hooks; //3. 创建一个节点 item = cJSON_New_Item(&global_hooks); if (item == NULL) /* memory fail */ {
goto fail; } //4. 先去掉utf8的BOM,再去掉前导的空白,然后解析 if (!parse_value(item, buffer_skip_whitespace(skip_utf8_bom(&buffer)))) {
/* parse failure. ep is set. */ goto fail; } /* if we require null-terminated JSON without appended garbage, skip and then check for a null terminator */ //字符串已经解析完 //5. 如果要求必须以'\0'结尾,检查最后一个字符 if (require_null_terminated) {
buffer_skip_whitespace(&buffer); if ((buffer.offset >= buffer.length) || buffer_at_offset(&buffer)[0] != '\0') {
goto fail; } } //6. 返回解析后的buffer if (return_parse_end) {
*return_parse_end = (const char*)buffer_at_offset(&buffer); } //7. 返回解析后的cJSON return item;fail: //出错处理 if (item != NULL) {
cJSON_Delete(item); } if (value != NULL) {
error local_error; local_error.json = (const unsigned char*)value; local_error.position = 0; if (buffer.offset < buffer.length) {
local_error.position = buffer.offset; } else if (buffer.length > 0) {
local_error.position = buffer.length - 1; } if (return_parse_end != NULL) {
*return_parse_end = (const char*)local_error.json + local_error.position; } global_error = local_error; } return NULL;}//解析时跳过前导的空格static parse_buffer *buffer_skip_whitespace(parse_buffer * const buffer){
if ((buffer == NULL) || (buffer->content == NULL)) {
return NULL; } //这里ascii码<=32都认为是空格 while (can_access_at_index(buffer, 0) && (buffer_at_offset(buffer)[0] <= 32)) {
buffer->offset++; } if (buffer->offset == buffer->length) {
buffer->offset--; } return buffer;}//去掉UTF-8 BOM "\xEF\xBB\xBF"static parse_buffer *skip_utf8_bom(parse_buffer * const buffer){
if ((buffer == NULL) || (buffer->content == NULL) || (buffer->offset != 0)) {
return NULL; } //去掉前导的BOM if (can_access_at_index(buffer, 4) && (strncmp((const char*)buffer_at_offset(buffer), "\xEF\xBB\xBF", 3) == 0)) {
buffer->offset += 3; } return buffer;}

parse

在parse_buffer中做了一些逻辑处理后,主要的解析在parse函数中.

/*解析核心算法:从buffer中解析出一个节点item:解析后的iteminput_buffer:输入的字符串*/static cJSON_bool parse_value(cJSON * const item, parse_buffer * const input_buffer){
if ((input_buffer == NULL) || (input_buffer->content == NULL)) {
return false; /* no input */ } //1.1 解析null:前面4个字符为null if (can_read(input_buffer, 4) && (strncmp((const char*)buffer_at_offset(input_buffer), "null", 4) == 0)) {
item->type = cJSON_NULL; input_buffer->offset += 4; return true; } //1.2 解析false: 前面5个字符为false if (can_read(input_buffer, 5) && (strncmp((const char*)buffer_at_offset(input_buffer), "false", 5) == 0)) {
item->type = cJSON_False; input_buffer->offset += 5; return true; } //1.3 解析true: 前面4个字符为true if (can_read(input_buffer, 4) && (strncmp((const char*)buffer_at_offset(input_buffer), "true", 4) == 0)) {
item->type = cJSON_True; item->valueint = 1; input_buffer->offset += 4; return true; } //1.4 解析string, 以"开头。调用parse_string if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '\"')) {
return parse_string(item, input_buffer); } //1.5 解析数字:以-开头或者0-9开头。调用parse_number if (can_access_at_index(input_buffer, 0) && ((buffer_at_offset(input_buffer)[0] == '-') || ((buffer_at_offset(input_buffer)[0] >= '0') && (buffer_at_offset(input_buffer)[0] <= '9')))) {
return parse_number(item, input_buffer); } //1.6 解析数组:以[开头。调用parse_array if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '[')) {
return parse_array(item, input_buffer); } //1.7 解析对象,以{开头。调用parse_object if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '{')) {
return parse_object(item, input_buffer); } return false;}

字符串解析

调用parse_string解析字符串。

//解析字符串static cJSON_bool parse_string(cJSON * const item, parse_buffer * const input_buffer){
const unsigned char *input_pointer = buffer_at_offset(input_buffer) + 1; //跳过第一个" const unsigned char *input_end = buffer_at_offset(input_buffer) + 1;//跳过第一个" unsigned char *output_pointer = NULL; unsigned char *output = NULL; /* not a string */ //1. 不是以"开头,不是字符串,直接返回 if (buffer_at_offset(input_buffer)[0] != '\"') {
goto fail; } {
/* calculate approximate size of the output (overestimate) */ //2. 计算存放这些字符串需要的空间 size_t allocation_length = 0; size_t skipped_bytes = 0; //2.1 一直到字符串结尾",此后input_end指向最后一个字符 while (((size_t)(input_end - input_buffer->content) < input_buffer->length) && (*input_end != '\"')) {
/* is escape sequence */ if (input_end[0] == '\\') //为反斜杠,说明遇到了转义字符 {
//出错:以\\结尾 if ((size_t)(input_end + 1 - input_buffer->content) >= input_buffer->length) {
/* prevent buffer overflow when last input character is a backslash */ goto fail; } skipped_bytes++; //跳过字符+1 input_end++; //字符串指针向前+1 } input_end++; //继续判断下一个字符 } //2.2 再次判断字符串是不是正确以"结尾 if (((size_t)(input_end - input_buffer->content) >= input_buffer->length) || (*input_end != '\"')) {
goto fail; /* string ended unexpectedly */ } /* This is at most how much we need for the output */ //2.3 需要的最大的存储字符串长度 allocation_length = (size_t) (input_end - buffer_at_offset(input_buffer)) - skipped_bytes; //2.4 output存放解析后的字符串 output = (unsigned char*)input_buffer->hooks.allocate(allocation_length + sizeof("")); if (output == NULL) {
goto fail; /* allocation failure */ } } //3. 输出的字符串指针指向解析字符串存放的位置 output_pointer = output; /* loop through the string literal */ //4. 解析字符串 while (input_pointer < input_end) {
//4.1 不是转义字符,直接判断下一个 if (*input_pointer != '\\') {
*output_pointer++ = *input_pointer++; } /* escape sequence */ else //4.2 处理转义字符 {
unsigned char sequence_length = 2; //4.2.1 当前处理的序列长度 // "/t", input_end指向最后的",input_pointer指向/ if ((input_end - input_pointer) < 1) {
goto fail; } //4.2.2 根据下一个字符判断 switch (input_pointer[1]) {
//转义字符 case 'b': *output_pointer++ = '\b'; break; case 'f': *output_pointer++ = '\f'; break; case 'n': *output_pointer++ = '\n'; break; case 'r': *output_pointer++ = '\r'; break; case 't': *output_pointer++ = '\t'; break; case '\"': case '\\': case '/': *output_pointer++ = input_pointer[1]; break; /* UTF-16 literal */ //UTF-16的处理 case 'u': sequence_length = utf16_literal_to_utf8(input_pointer, input_end, &output_pointer); if (sequence_length == 0) {
/* failed to convert UTF16-literal to UTF-8 */ goto fail; } break; default: goto fail; } //input跳过的字节数 input_pointer += sequence_length; } } /* zero terminate the output */ //5. 分析完所有的字符串,天界结束符 *output_pointer = '\0'; //6. 填充item结构体 item->type = cJSON_String; item->valuestring = (char*)output; input_buffer->offset = (size_t) (input_end - input_buffer->content); input_buffer->offset++; return true;fail: //失败的处理 if (output != NULL) {
input_buffer->hooks.deallocate(output); } if (input_pointer != NULL) {
input_buffer->offset = (size_t)(input_pointer - input_buffer->content); } return false;}

UTF-16转UTF-8

UTF-8 的编码单元是 8 位的字节、UTF-16 为 16 位。JSON字符串中的 \uXXXX 是以 16 进制表示码点 U+0000 至 U+FFFF。如果第一个码点是 U+D800 至 U+DBFF,我们便知道它的代码对的高代理项(high surrogate),之后应该伴随一个 U+DC00 至 U+DFFF 的低代理项(low surrogate)。然后,我们用下列公式把代理对 (H, L) 变换成真实的码点:

codepoint = 0x10000 + (H − 0xD800) × 0x400 + (L − 0xDC00)

码点范围 码点位数 字节1 字节2 字节3 字节4
U+0000~U+007F 7 0xxxxxxx
U+0080~U+07FF 11 110xxxxx 10xxxxxx
U+0800~U+FFFF 16 1110xxxx 10xxxxxx 10xxxxxx
U+10000~U+10FFFF 21 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
/* parse 4 digit hexadecimal number *///把四位16进制的数转为十进制的数static unsigned parse_hex4(const unsigned char * const input){
unsigned int h = 0; size_t i = 0; for (i = 0; i < 4; i++) {
/* parse digit */ if ((input[i] >= '0') && (input[i] <= '9')) {
h += (unsigned int) input[i] - '0'; } else if ((input[i] >= 'A') && (input[i] <= 'F')) {
h += (unsigned int) 10 + input[i] - 'A'; } else if ((input[i] >= 'a') && (input[i] <= 'f')) {
h += (unsigned int) 10 + input[i] - 'a'; } else /* invalid */ {
return 0; } if (i < 3) {
/* shift left to make place for the next nibble */ h = h << 4; } } return h;}/* converts a UTF-16 literal to UTF-8 * A literal can be one or two sequences of the form \uXXXX *///utf-16转为utf-8static unsigned char utf16_literal_to_utf8(const unsigned char * const input_pointer, const unsigned char * const input_end, unsigned char **output_pointer){
long unsigned int codepoint = 0; unsigned int first_code = 0; const unsigned char *first_sequence = input_pointer; unsigned char utf8_length = 0; unsigned char utf8_position = 0; unsigned char sequence_length = 0; unsigned char first_byte_mark = 0; //至少有6个字符 if ((input_end - first_sequence) < 6) {
/* input ends unexpectedly */ goto fail; } /* get the first utf16 sequence */ first_code = parse_hex4(first_sequence + 2); /* check that the code is valid */ //检查第一个码点,有效范围为0xDC00~0xDFFF if (((first_code >= 0xDC00) && (first_code <= 0xDFFF))) //无效字符 {
goto fail; } /* UTF16 surrogate pair */ //如果在U+D800 至 U+DBFF,还有低代理项 if ((first_code >= 0xD800) && (first_code <= 0xDBFF)) {
const unsigned char *second_sequence = first_sequence + 6; unsigned int second_code = 0; sequence_length = 12; /* \uXXXX\uXXXX */ if ((input_end - second_sequence) < 6) {
/* input ends unexpectedly */ goto fail; } if ((second_sequence[0] != '\\') || (second_sequence[1] != 'u')) {
/* missing second half of the surrogate pair */ goto fail; } /* get the second utf16 sequence */ second_code = parse_hex4(second_sequence + 2); /* check that the code is valid */ if ((second_code < 0xDC00) || (second_code > 0xDFFF)) {
/* invalid second half of the surrogate pair */ goto fail; } /* calculate the unicode codepoint from the surrogate pair */ //对于有高代理项,码点计算方法codepoint = 0x10000 + (H − 0xD800) × 0x400 + (L − 0xDC00) codepoint = 0x10000 + (((first_code & 0x3FF) << 10) | (second_code & 0x3FF)); } else {
//对于\uxxxx格式的 sequence_length = 6; /* \uXXXX */ codepoint = first_code; } /* encode as UTF-8 * takes at maximum 4 bytes to encode: * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ if (codepoint < 0x80) //U+0000~U+007F {
/* normal ascii, encoding 0xxxxxxx */ utf8_length = 1; } else if (codepoint < 0x800) //U+0080~U+07FF {
/* two bytes, encoding 110xxxxx 10xxxxxx */ utf8_length = 2; first_byte_mark = 0xC0; /* 11000000 */ } else if (codepoint < 0x10000) //U+0800~U+FFFF {
/* three bytes, encoding 1110xxxx 10xxxxxx 10xxxxxx */ utf8_length = 3; first_byte_mark = 0xE0; /* 11100000 */ } else if (codepoint <= 0x10FFFF)//U+10000~U+10FFFF {
/* four bytes, encoding 1110xxxx 10xxxxxx 10xxxxxx 10xxxxxx */ utf8_length = 4; first_byte_mark = 0xF0; /* 11110000 */ } else {
/* invalid unicode codepoint */ goto fail; } /* encode as utf8 */ //1000 0000 10111111 for (utf8_position = (unsigned char)(utf8_length - 1); utf8_position > 0; utf8_position--) {
/* 10xxxxxx */ (*output_pointer)[utf8_position] = (unsigned char)((codepoint | 0x80) & 0xBF); codepoint >>= 6; } /* encode first byte */ if (utf8_length > 1) {
(*output_pointer)[0] = (unsigned char)((codepoint | first_byte_mark) & 0xFF); } else {
(*output_pointer)[0] = (unsigned char)(codepoint & 0x7F); } *output_pointer += utf8_length; return sequence_length;fail: return 0;}

解析数字

解析数字时,把数字的字符串复制出来,调用strtod函数

//解析数字static cJSON_bool parse_number(cJSON * const item, parse_buffer * const input_buffer){
double number = 0; unsigned char *after_end = NULL; unsigned char number_c_string[64]; //存放数字的字符串 unsigned char decimal_point = get_decimal_point(); size_t i = 0; if ((input_buffer == NULL) || (input_buffer->content == NULL)) {
return false; } for (i = 0; (i < (sizeof(number_c_string) - 1)) && can_access_at_index(input_buffer, i); i++) {
switch (buffer_at_offset(input_buffer)[i]) {
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '+': case '-': case 'e': case 'E': number_c_string[i] = buffer_at_offset(input_buffer)[i]; break; case '.': number_c_string[i] = decimal_point; break; default: goto loop_end; } }loop_end: number_c_string[i] = '\0'; //使用strtod解析数字,after_end返回解析完成后的下一个的字符的位置 11.22abc 解析后after_end指向a number = strtod((const char*)number_c_string, (char**)&after_end); if (number_c_string == after_end) //解析出错 {
return false; /* parse_error */ } item->valuedouble = number; /* use saturation in case of overflow */ if (number >= INT_MAX) {
item->valueint = INT_MAX; } else if (number <= (double)INT_MIN) {
item->valueint = INT_MIN; } else {
item->valueint = (int)number; //转为整数 } item->type = cJSON_Number; input_buffer->offset += (size_t)(after_end - number_c_string); return true;}

解析数组

解析数组比较简单,遇到[,后面直到遇到,就是一个item, 遇到]解析完毕。

static cJSON_bool parse_array(cJSON * const item, parse_buffer * const input_buffer){
cJSON *head = NULL; /* head of the linked list */ cJSON *current_item = NULL; if (input_buffer->depth >= CJSON_NESTING_LIMIT) {
return false; /* to deeply nested */ } input_buffer->depth++; //1. 深度+1 if (buffer_at_offset(input_buffer)[0] != '[') {
/* not an array */ goto fail; } input_buffer->offset++; buffer_skip_whitespace(input_buffer); //2. 移除空格 if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == ']')) {
//3.1 空的数组 goto success; } /* check if we skipped to the end of the buffer */ if (cannot_access_at_index(input_buffer, 0)) {
input_buffer->offset--; goto fail; } /* step back to character in front of the first element */ input_buffer->offset--; /* loop through the comma separated array elements */ //3.2 开始解析每一个以逗号分隔的item do {
/* allocate next item */ //3.2.1 创建item cJSON *new_item = cJSON_New_Item(&(input_buffer->hooks)); if (new_item == NULL) {
goto fail; /* allocation failure */ } /* attach next item to list */ //3.2.2 插入item if (head == NULL) {
/* start the linked list */ current_item = head = new_item; } else {
/* add to the end and advance */ current_item->next = new_item; new_item->prev = current_item; current_item = new_item; } /* parse next value */ //3.2.3 解析下一个值 input_buffer->offset++; buffer_skip_whitespace(input_buffer); if (!parse_value(current_item, input_buffer)) //解析数组中的元素 {
goto fail; /* failed to parse value */ } buffer_skip_whitespace(input_buffer); //3.2.4 移除空格 } while (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == ',')); //4. 解析完了字符,判断是否以]结尾 if (cannot_access_at_index(input_buffer, 0) || buffer_at_offset(input_buffer)[0] != ']') {
goto fail; /* expected end of array */ }success: //5.1 解析成功,设置item input_buffer->depth--; item->type = cJSON_Array; item->child = head; input_buffer->offset++; return true;fail: //5.2 解析失败,释放item if (head != NULL) {
cJSON_Delete(head); } return false;}

解析对象

解析对象和解析数组类似。

/*解析object*/static cJSON_bool parse_object(cJSON * const item, parse_buffer * const input_buffer){
cJSON *head = NULL; /* linked list head */ cJSON *current_item = NULL; if (input_buffer->depth >= CJSON_NESTING_LIMIT) {
return false; /* to deeply nested */ } input_buffer->depth++; if (cannot_access_at_index(input_buffer, 0) || (buffer_at_offset(input_buffer)[0] != '{')) {
goto fail; /* not an object */ } input_buffer->offset++; buffer_skip_whitespace(input_buffer); if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '}')) {
goto success; /* empty object */ } /* check if we skipped to the end of the buffer */ if (cannot_access_at_index(input_buffer, 0)) {
input_buffer->offset--; goto fail; } /* step back to character in front of the first element */ input_buffer->offset--; /* loop through the comma separated array elements */ do {
/* allocate next item */ cJSON *new_item = cJSON_New_Item(&(input_buffer->hooks)); if (new_item == NULL) {
goto fail; /* allocation failure */ } /* attach next item to list */ if (head == NULL) {
/* start the linked list */ current_item = head = new_item; } else {
/* add to the end and advance */ current_item->next = new_item; new_item->prev = current_item; current_item = new_item; } /* parse the name of the child */ input_buffer->offset++; buffer_skip_whitespace(input_buffer); if (!parse_string(current_item, input_buffer)) {
goto fail; /* failed to parse name */ } buffer_skip_whitespace(input_buffer); /* swap valuestring and string, because we parsed the name */ current_item->string = current_item->valuestring; current_item->valuestring = NULL; if (cannot_access_at_index(input_buffer, 0) || (buffer_at_offset(input_buffer)[0] != ':')) {
goto fail; /* invalid object */ } /* parse the value */ input_buffer->offset++; buffer_skip_whitespace(input_buffer); if (!parse_value(current_item, input_buffer)) {
goto fail; /* failed to parse value */ } buffer_skip_whitespace(input_buffer); } while (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == ',')); if (cannot_access_at_index(input_buffer, 0) || (buffer_at_offset(input_buffer)[0] != '}')) {
goto fail; /* expected end of object */ }success: input_buffer->depth--; item->type = cJSON_Object; item->child = head; input_buffer->offset++; return true;fail: if (head != NULL) {
cJSON_Delete(head); } return false;}

转载地址:http://utmxb.baihongyu.com/

你可能感兴趣的文章
Android Scroller简单用法
查看>>
ffmpeg系列:使用ffmpeg转换为RGB数据并缩放视频
查看>>
Vmware虚拟机设置固定IP地址
查看>>
能Ping通外网但就是不能打开所有网页的解决办法
查看>>
第六章 6.2.4节练习
查看>>
第六章 6.2.5节练习 & 6.2.6节练习
查看>>
第六章 6.3.2节练习
查看>>
第六章 6.4节练习 & 6.5.1节练习
查看>>
第六章 6.5.2节练习
查看>>
Linux 体系结构、远程登录、文件传输
查看>>
Linux 安装软件、配置静态IP
查看>>
JavaScript 函数、闭包
查看>>
JavaScript BOM
查看>>
JavaScript Date、Math对象、杂项
查看>>
Bootstrap的使用
查看>>
Java IO
查看>>
Java JDBC
查看>>
JVM 类加载机制、对象的创建过程
查看>>
SpringBoot整合AOP
查看>>
消息中间件 MQ
查看>>