本文共 22377 字,大约阅读时间需要 74 分钟。
对于字符串,调用cJSON_Parse解析为一个cJSON对象
CJSON_PUBLIC(cJSON *) cJSON_Parse(const char *value);
为存解析的字符串,使用parse_buffer.
//解析的缓存typedef struct{ const unsigned char *content; //字符串内容 size_t length; //长度 size_t offset; //当前位置 size_t depth; //当前位置对象或数组的深度 internal_hooks hooks; //使用的hook} parse_buffer;
parse_buffer常用函数的宏定义:
//判断buffer能否读取到第size个字节。条件:buffer不为空,且buffer当前的位置+size不大于buffer的总长度#define can_read(buffer, size) ((buffer != NULL) && (((buffer)->offset + size) <= (buffer)->length))//是否能方位index位置#define can_access_at_index(buffer, index) ((buffer != NULL) && (((buffer)->offset + index) < (buffer)->length))#define cannot_access_at_index(buffer, index) (!can_access_at_index(buffer, index))//在offset位置的字符串指针#define buffer_at_offset(buffer) ((buffer)->content + (buffer)->offset)
//调用cJSON_ParseWithOptsCJSON_PUBLIC(cJSON *) cJSON_Parse(const char *value){ return cJSON_ParseWithOpts(value, 0, 0);}CJSON_PUBLIC(cJSON *) cJSON_ParseWithOpts(const char *value, const char **return_parse_end, cJSON_bool require_null_terminated){ //1.1 初始化解析的buffer parse_buffer buffer = { 0, 0, 0, 0, { 0, 0, 0 } }; //item是返回的cJSON对象 cJSON *item = NULL; /* reset error position */ //1.2 初始化全局错误 global_error.json = NULL; global_error.position = 0; if (value == NULL) { goto fail; } //2. 填充buffer buffer.content = (const unsigned char*)value; buffer.length = strlen((const char*)value) + sizeof(""); buffer.offset = 0; buffer.hooks = global_hooks; //3. 创建一个节点 item = cJSON_New_Item(&global_hooks); if (item == NULL) /* memory fail */ { goto fail; } //4. 先去掉utf8的BOM,再去掉前导的空白,然后解析 if (!parse_value(item, buffer_skip_whitespace(skip_utf8_bom(&buffer)))) { /* parse failure. ep is set. */ goto fail; } /* if we require null-terminated JSON without appended garbage, skip and then check for a null terminator */ //字符串已经解析完 //5. 如果要求必须以'\0'结尾,检查最后一个字符 if (require_null_terminated) { buffer_skip_whitespace(&buffer); if ((buffer.offset >= buffer.length) || buffer_at_offset(&buffer)[0] != '\0') { goto fail; } } //6. 返回解析后的buffer if (return_parse_end) { *return_parse_end = (const char*)buffer_at_offset(&buffer); } //7. 返回解析后的cJSON return item;fail: //出错处理 if (item != NULL) { cJSON_Delete(item); } if (value != NULL) { error local_error; local_error.json = (const unsigned char*)value; local_error.position = 0; if (buffer.offset < buffer.length) { local_error.position = buffer.offset; } else if (buffer.length > 0) { local_error.position = buffer.length - 1; } if (return_parse_end != NULL) { *return_parse_end = (const char*)local_error.json + local_error.position; } global_error = local_error; } return NULL;}//解析时跳过前导的空格static parse_buffer *buffer_skip_whitespace(parse_buffer * const buffer){ if ((buffer == NULL) || (buffer->content == NULL)) { return NULL; } //这里ascii码<=32都认为是空格 while (can_access_at_index(buffer, 0) && (buffer_at_offset(buffer)[0] <= 32)) { buffer->offset++; } if (buffer->offset == buffer->length) { buffer->offset--; } return buffer;}//去掉UTF-8 BOM "\xEF\xBB\xBF"static parse_buffer *skip_utf8_bom(parse_buffer * const buffer){ if ((buffer == NULL) || (buffer->content == NULL) || (buffer->offset != 0)) { return NULL; } //去掉前导的BOM if (can_access_at_index(buffer, 4) && (strncmp((const char*)buffer_at_offset(buffer), "\xEF\xBB\xBF", 3) == 0)) { buffer->offset += 3; } return buffer;}
在parse_buffer中做了一些逻辑处理后,主要的解析在parse函数中.
/*解析核心算法:从buffer中解析出一个节点item:解析后的iteminput_buffer:输入的字符串*/static cJSON_bool parse_value(cJSON * const item, parse_buffer * const input_buffer){ if ((input_buffer == NULL) || (input_buffer->content == NULL)) { return false; /* no input */ } //1.1 解析null:前面4个字符为null if (can_read(input_buffer, 4) && (strncmp((const char*)buffer_at_offset(input_buffer), "null", 4) == 0)) { item->type = cJSON_NULL; input_buffer->offset += 4; return true; } //1.2 解析false: 前面5个字符为false if (can_read(input_buffer, 5) && (strncmp((const char*)buffer_at_offset(input_buffer), "false", 5) == 0)) { item->type = cJSON_False; input_buffer->offset += 5; return true; } //1.3 解析true: 前面4个字符为true if (can_read(input_buffer, 4) && (strncmp((const char*)buffer_at_offset(input_buffer), "true", 4) == 0)) { item->type = cJSON_True; item->valueint = 1; input_buffer->offset += 4; return true; } //1.4 解析string, 以"开头。调用parse_string if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '\"')) { return parse_string(item, input_buffer); } //1.5 解析数字:以-开头或者0-9开头。调用parse_number if (can_access_at_index(input_buffer, 0) && ((buffer_at_offset(input_buffer)[0] == '-') || ((buffer_at_offset(input_buffer)[0] >= '0') && (buffer_at_offset(input_buffer)[0] <= '9')))) { return parse_number(item, input_buffer); } //1.6 解析数组:以[开头。调用parse_array if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '[')) { return parse_array(item, input_buffer); } //1.7 解析对象,以{开头。调用parse_object if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '{')) { return parse_object(item, input_buffer); } return false;}
调用parse_string解析字符串。
//解析字符串static cJSON_bool parse_string(cJSON * const item, parse_buffer * const input_buffer){ const unsigned char *input_pointer = buffer_at_offset(input_buffer) + 1; //跳过第一个" const unsigned char *input_end = buffer_at_offset(input_buffer) + 1;//跳过第一个" unsigned char *output_pointer = NULL; unsigned char *output = NULL; /* not a string */ //1. 不是以"开头,不是字符串,直接返回 if (buffer_at_offset(input_buffer)[0] != '\"') { goto fail; } { /* calculate approximate size of the output (overestimate) */ //2. 计算存放这些字符串需要的空间 size_t allocation_length = 0; size_t skipped_bytes = 0; //2.1 一直到字符串结尾",此后input_end指向最后一个字符 while (((size_t)(input_end - input_buffer->content) < input_buffer->length) && (*input_end != '\"')) { /* is escape sequence */ if (input_end[0] == '\\') //为反斜杠,说明遇到了转义字符 { //出错:以\\结尾 if ((size_t)(input_end + 1 - input_buffer->content) >= input_buffer->length) { /* prevent buffer overflow when last input character is a backslash */ goto fail; } skipped_bytes++; //跳过字符+1 input_end++; //字符串指针向前+1 } input_end++; //继续判断下一个字符 } //2.2 再次判断字符串是不是正确以"结尾 if (((size_t)(input_end - input_buffer->content) >= input_buffer->length) || (*input_end != '\"')) { goto fail; /* string ended unexpectedly */ } /* This is at most how much we need for the output */ //2.3 需要的最大的存储字符串长度 allocation_length = (size_t) (input_end - buffer_at_offset(input_buffer)) - skipped_bytes; //2.4 output存放解析后的字符串 output = (unsigned char*)input_buffer->hooks.allocate(allocation_length + sizeof("")); if (output == NULL) { goto fail; /* allocation failure */ } } //3. 输出的字符串指针指向解析字符串存放的位置 output_pointer = output; /* loop through the string literal */ //4. 解析字符串 while (input_pointer < input_end) { //4.1 不是转义字符,直接判断下一个 if (*input_pointer != '\\') { *output_pointer++ = *input_pointer++; } /* escape sequence */ else //4.2 处理转义字符 { unsigned char sequence_length = 2; //4.2.1 当前处理的序列长度 // "/t", input_end指向最后的",input_pointer指向/ if ((input_end - input_pointer) < 1) { goto fail; } //4.2.2 根据下一个字符判断 switch (input_pointer[1]) { //转义字符 case 'b': *output_pointer++ = '\b'; break; case 'f': *output_pointer++ = '\f'; break; case 'n': *output_pointer++ = '\n'; break; case 'r': *output_pointer++ = '\r'; break; case 't': *output_pointer++ = '\t'; break; case '\"': case '\\': case '/': *output_pointer++ = input_pointer[1]; break; /* UTF-16 literal */ //UTF-16的处理 case 'u': sequence_length = utf16_literal_to_utf8(input_pointer, input_end, &output_pointer); if (sequence_length == 0) { /* failed to convert UTF16-literal to UTF-8 */ goto fail; } break; default: goto fail; } //input跳过的字节数 input_pointer += sequence_length; } } /* zero terminate the output */ //5. 分析完所有的字符串,天界结束符 *output_pointer = '\0'; //6. 填充item结构体 item->type = cJSON_String; item->valuestring = (char*)output; input_buffer->offset = (size_t) (input_end - input_buffer->content); input_buffer->offset++; return true;fail: //失败的处理 if (output != NULL) { input_buffer->hooks.deallocate(output); } if (input_pointer != NULL) { input_buffer->offset = (size_t)(input_pointer - input_buffer->content); } return false;}
UTF-8 的编码单元是 8 位的字节、UTF-16 为 16 位。JSON字符串中的 \uXXXX 是以 16 进制表示码点 U+0000 至 U+FFFF。如果第一个码点是 U+D800 至 U+DBFF,我们便知道它的代码对的高代理项(high surrogate),之后应该伴随一个 U+DC00 至 U+DFFF 的低代理项(low surrogate)。然后,我们用下列公式把代理对 (H, L) 变换成真实的码点:
codepoint = 0x10000 + (H − 0xD800) × 0x400 + (L − 0xDC00)
码点范围 | 码点位数 | 字节1 | 字节2 | 字节3 | 字节4 |
---|---|---|---|---|---|
U+0000~U+007F | 7 | 0xxxxxxx | |||
U+0080~U+07FF | 11 | 110xxxxx | 10xxxxxx | ||
U+0800~U+FFFF | 16 | 1110xxxx | 10xxxxxx | 10xxxxxx | |
U+10000~U+10FFFF | 21 | 11110xxx | 10xxxxxx | 10xxxxxx | 10xxxxxx |
/* parse 4 digit hexadecimal number *///把四位16进制的数转为十进制的数static unsigned parse_hex4(const unsigned char * const input){ unsigned int h = 0; size_t i = 0; for (i = 0; i < 4; i++) { /* parse digit */ if ((input[i] >= '0') && (input[i] <= '9')) { h += (unsigned int) input[i] - '0'; } else if ((input[i] >= 'A') && (input[i] <= 'F')) { h += (unsigned int) 10 + input[i] - 'A'; } else if ((input[i] >= 'a') && (input[i] <= 'f')) { h += (unsigned int) 10 + input[i] - 'a'; } else /* invalid */ { return 0; } if (i < 3) { /* shift left to make place for the next nibble */ h = h << 4; } } return h;}/* converts a UTF-16 literal to UTF-8 * A literal can be one or two sequences of the form \uXXXX *///utf-16转为utf-8static unsigned char utf16_literal_to_utf8(const unsigned char * const input_pointer, const unsigned char * const input_end, unsigned char **output_pointer){ long unsigned int codepoint = 0; unsigned int first_code = 0; const unsigned char *first_sequence = input_pointer; unsigned char utf8_length = 0; unsigned char utf8_position = 0; unsigned char sequence_length = 0; unsigned char first_byte_mark = 0; //至少有6个字符 if ((input_end - first_sequence) < 6) { /* input ends unexpectedly */ goto fail; } /* get the first utf16 sequence */ first_code = parse_hex4(first_sequence + 2); /* check that the code is valid */ //检查第一个码点,有效范围为0xDC00~0xDFFF if (((first_code >= 0xDC00) && (first_code <= 0xDFFF))) //无效字符 { goto fail; } /* UTF16 surrogate pair */ //如果在U+D800 至 U+DBFF,还有低代理项 if ((first_code >= 0xD800) && (first_code <= 0xDBFF)) { const unsigned char *second_sequence = first_sequence + 6; unsigned int second_code = 0; sequence_length = 12; /* \uXXXX\uXXXX */ if ((input_end - second_sequence) < 6) { /* input ends unexpectedly */ goto fail; } if ((second_sequence[0] != '\\') || (second_sequence[1] != 'u')) { /* missing second half of the surrogate pair */ goto fail; } /* get the second utf16 sequence */ second_code = parse_hex4(second_sequence + 2); /* check that the code is valid */ if ((second_code < 0xDC00) || (second_code > 0xDFFF)) { /* invalid second half of the surrogate pair */ goto fail; } /* calculate the unicode codepoint from the surrogate pair */ //对于有高代理项,码点计算方法codepoint = 0x10000 + (H − 0xD800) × 0x400 + (L − 0xDC00) codepoint = 0x10000 + (((first_code & 0x3FF) << 10) | (second_code & 0x3FF)); } else { //对于\uxxxx格式的 sequence_length = 6; /* \uXXXX */ codepoint = first_code; } /* encode as UTF-8 * takes at maximum 4 bytes to encode: * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ if (codepoint < 0x80) //U+0000~U+007F { /* normal ascii, encoding 0xxxxxxx */ utf8_length = 1; } else if (codepoint < 0x800) //U+0080~U+07FF { /* two bytes, encoding 110xxxxx 10xxxxxx */ utf8_length = 2; first_byte_mark = 0xC0; /* 11000000 */ } else if (codepoint < 0x10000) //U+0800~U+FFFF { /* three bytes, encoding 1110xxxx 10xxxxxx 10xxxxxx */ utf8_length = 3; first_byte_mark = 0xE0; /* 11100000 */ } else if (codepoint <= 0x10FFFF)//U+10000~U+10FFFF { /* four bytes, encoding 1110xxxx 10xxxxxx 10xxxxxx 10xxxxxx */ utf8_length = 4; first_byte_mark = 0xF0; /* 11110000 */ } else { /* invalid unicode codepoint */ goto fail; } /* encode as utf8 */ //1000 0000 10111111 for (utf8_position = (unsigned char)(utf8_length - 1); utf8_position > 0; utf8_position--) { /* 10xxxxxx */ (*output_pointer)[utf8_position] = (unsigned char)((codepoint | 0x80) & 0xBF); codepoint >>= 6; } /* encode first byte */ if (utf8_length > 1) { (*output_pointer)[0] = (unsigned char)((codepoint | first_byte_mark) & 0xFF); } else { (*output_pointer)[0] = (unsigned char)(codepoint & 0x7F); } *output_pointer += utf8_length; return sequence_length;fail: return 0;}
解析数字时,把数字的字符串复制出来,调用strtod函数
//解析数字static cJSON_bool parse_number(cJSON * const item, parse_buffer * const input_buffer){ double number = 0; unsigned char *after_end = NULL; unsigned char number_c_string[64]; //存放数字的字符串 unsigned char decimal_point = get_decimal_point(); size_t i = 0; if ((input_buffer == NULL) || (input_buffer->content == NULL)) { return false; } for (i = 0; (i < (sizeof(number_c_string) - 1)) && can_access_at_index(input_buffer, i); i++) { switch (buffer_at_offset(input_buffer)[i]) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '+': case '-': case 'e': case 'E': number_c_string[i] = buffer_at_offset(input_buffer)[i]; break; case '.': number_c_string[i] = decimal_point; break; default: goto loop_end; } }loop_end: number_c_string[i] = '\0'; //使用strtod解析数字,after_end返回解析完成后的下一个的字符的位置 11.22abc 解析后after_end指向a number = strtod((const char*)number_c_string, (char**)&after_end); if (number_c_string == after_end) //解析出错 { return false; /* parse_error */ } item->valuedouble = number; /* use saturation in case of overflow */ if (number >= INT_MAX) { item->valueint = INT_MAX; } else if (number <= (double)INT_MIN) { item->valueint = INT_MIN; } else { item->valueint = (int)number; //转为整数 } item->type = cJSON_Number; input_buffer->offset += (size_t)(after_end - number_c_string); return true;}
解析数组比较简单,遇到[
,后面直到遇到,
就是一个item, 遇到]
解析完毕。
static cJSON_bool parse_array(cJSON * const item, parse_buffer * const input_buffer){ cJSON *head = NULL; /* head of the linked list */ cJSON *current_item = NULL; if (input_buffer->depth >= CJSON_NESTING_LIMIT) { return false; /* to deeply nested */ } input_buffer->depth++; //1. 深度+1 if (buffer_at_offset(input_buffer)[0] != '[') { /* not an array */ goto fail; } input_buffer->offset++; buffer_skip_whitespace(input_buffer); //2. 移除空格 if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == ']')) { //3.1 空的数组 goto success; } /* check if we skipped to the end of the buffer */ if (cannot_access_at_index(input_buffer, 0)) { input_buffer->offset--; goto fail; } /* step back to character in front of the first element */ input_buffer->offset--; /* loop through the comma separated array elements */ //3.2 开始解析每一个以逗号分隔的item do { /* allocate next item */ //3.2.1 创建item cJSON *new_item = cJSON_New_Item(&(input_buffer->hooks)); if (new_item == NULL) { goto fail; /* allocation failure */ } /* attach next item to list */ //3.2.2 插入item if (head == NULL) { /* start the linked list */ current_item = head = new_item; } else { /* add to the end and advance */ current_item->next = new_item; new_item->prev = current_item; current_item = new_item; } /* parse next value */ //3.2.3 解析下一个值 input_buffer->offset++; buffer_skip_whitespace(input_buffer); if (!parse_value(current_item, input_buffer)) //解析数组中的元素 { goto fail; /* failed to parse value */ } buffer_skip_whitespace(input_buffer); //3.2.4 移除空格 } while (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == ',')); //4. 解析完了字符,判断是否以]结尾 if (cannot_access_at_index(input_buffer, 0) || buffer_at_offset(input_buffer)[0] != ']') { goto fail; /* expected end of array */ }success: //5.1 解析成功,设置item input_buffer->depth--; item->type = cJSON_Array; item->child = head; input_buffer->offset++; return true;fail: //5.2 解析失败,释放item if (head != NULL) { cJSON_Delete(head); } return false;}
解析对象和解析数组类似。
/*解析object*/static cJSON_bool parse_object(cJSON * const item, parse_buffer * const input_buffer){ cJSON *head = NULL; /* linked list head */ cJSON *current_item = NULL; if (input_buffer->depth >= CJSON_NESTING_LIMIT) { return false; /* to deeply nested */ } input_buffer->depth++; if (cannot_access_at_index(input_buffer, 0) || (buffer_at_offset(input_buffer)[0] != '{')) { goto fail; /* not an object */ } input_buffer->offset++; buffer_skip_whitespace(input_buffer); if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '}')) { goto success; /* empty object */ } /* check if we skipped to the end of the buffer */ if (cannot_access_at_index(input_buffer, 0)) { input_buffer->offset--; goto fail; } /* step back to character in front of the first element */ input_buffer->offset--; /* loop through the comma separated array elements */ do { /* allocate next item */ cJSON *new_item = cJSON_New_Item(&(input_buffer->hooks)); if (new_item == NULL) { goto fail; /* allocation failure */ } /* attach next item to list */ if (head == NULL) { /* start the linked list */ current_item = head = new_item; } else { /* add to the end and advance */ current_item->next = new_item; new_item->prev = current_item; current_item = new_item; } /* parse the name of the child */ input_buffer->offset++; buffer_skip_whitespace(input_buffer); if (!parse_string(current_item, input_buffer)) { goto fail; /* failed to parse name */ } buffer_skip_whitespace(input_buffer); /* swap valuestring and string, because we parsed the name */ current_item->string = current_item->valuestring; current_item->valuestring = NULL; if (cannot_access_at_index(input_buffer, 0) || (buffer_at_offset(input_buffer)[0] != ':')) { goto fail; /* invalid object */ } /* parse the value */ input_buffer->offset++; buffer_skip_whitespace(input_buffer); if (!parse_value(current_item, input_buffer)) { goto fail; /* failed to parse value */ } buffer_skip_whitespace(input_buffer); } while (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == ',')); if (cannot_access_at_index(input_buffer, 0) || (buffer_at_offset(input_buffer)[0] != '}')) { goto fail; /* expected end of object */ }success: input_buffer->depth--; item->type = cJSON_Object; item->child = head; input_buffer->offset++; return true;fail: if (head != NULL) { cJSON_Delete(head); } return false;}
转载地址:http://utmxb.baihongyu.com/