根据指定键对自定义 JSON 输出

news2025/4/8 3:18:42

要根据指定键对自定义 JSON 进行输出，通常的做法是：

解析 JSON 数据。
按照用户给定的键提取或排序数据。
重新构造并输出 JSON 数据。

这里有几个常见的场景：

提取特定键及其值。
按特定键排序 JSON 数组。
过滤掉不需要的键。

接下来，我们将用 Python 演示这些操作，结合 json 模块解析和处理 JSON 数据。

在这里插入图片描述

问题背景

在使用 simplejson 库将 Python 字典转换为 JSON 时，希望为某些特定的键对自定义输出。例如，需要 callback 和 scope 这两个键的值不包含双引号，以便 JavaScript 能够解析数据，而不是将其视为字符串。

解决方案

方法一：修改 JSON 源代码

导入必要的模块。

import json
from json.encoder import encode_basestring_ascii, encode_basestring, FLOAT_REPR, INFINITY, c_make_encoder

创建自定义键类 JsonSpecialKey。

class JsonSpecialKey(object):
    def __init__(self, data):
        self.data = data

创建自定义 JSON 编码器 JsonSpecialEncoder。

class JsonSpecialEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, JsonSpecialKey):
            return obj.data
        return json.JSONEncoder.default(self, obj)

使用自定义编码器对数据进行编码。

d = {'testKey': JsonSpecialKey('function() {alert(123);}')}
print(json.dumps(d, cls=JsonSpecialEncoder, ensure_ascii=False, indent=4))

方法二：使用预定义的 JSON 编码器

一些 JSON 编码器库提供了更简单的自定义输出方式。例如，ujson 库允许您通过添加 @ 符号来指定需要排除双引号的键。

安装 ujson 库。

pip install ujson

导入必要的模块。

import ujson

使用自定义编码器对数据进行编码。

d = {'testKey': 'function() {alert(123);}'}
print(ujson.dumps(d, double_quote=False, escape_forward_slashes=False))

代码示例

# 方法一：修改 JSON 源代码
import json
from json.encoder import encode_basestring_ascii, encode_basestring, FLOAT_REPR, INFINITY, c_make_encoder

class JsonSpecialKey(object):
    def __init__(self, data):
        self.data = data

def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
        _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
        ## HACK: hand-optimized bytecode; turn globals into locals
        ValueError=ValueError,
        dict=dict,
        float=float,
        id=id,
        int=int,
        isinstance=isinstance,
        list=list,
        str=str,
        tuple=tuple,
    ):

    if _indent is not None and not isinstance(_indent, str):
        _indent = ' ' * _indent

    def _iterencode_list(lst, _current_indent_level):
        if not lst:
            yield '[]'
            return
        if markers is not None:
            markerid = id(lst)
            if markerid in markers:
                raise ValueError("Circular reference detected")
            markers[markerid] = lst
        buf = '['
        if _indent is not None:
            _current_indent_level += 1
            newline_indent = '\n' + _indent * _current_indent_level
            separator = _item_separator + newline_indent
            buf += newline_indent
        else:
            newline_indent = None
            separator = _item_separator
        first = True
        for value in lst:
            if first:
                first = False
            else:
                buf = separator
            if isinstance(value, str):
                yield buf + _encoder(value)
            elif value is None:
                yield buf + 'null'
            elif value is True:
                yield buf + 'true'
            elif value is False:
                yield buf + 'false'
            elif isinstance(value, int):
                yield buf + str(value)
            elif isinstance(value, float):
                yield buf + _floatstr(value)
            elif isinstance(value, JsonSpecialKey):
                yield buf + value.data

            else:
                yield buf
                if isinstance(value, (list, tuple)):
                    chunks = _iterencode_list(value, _current_indent_level)
                elif isinstance(value, dict):
                    chunks = _iterencode_dict(value, _current_indent_level)
                else:
                    chunks = _iterencode(value, _current_indent_level)
                for chunk in chunks:
                    yield chunk
        if newline_indent is not None:
            _current_indent_level -= 1
            yield '\n' + _indent * _current_indent_level
        yield ']'
        if markers is not None:
            del markers[markerid]

    def _iterencode_dict(dct, _current_indent_level):
        if not dct:
            yield '{}'
            return
        if markers is not None:
            markerid = id(dct)
            if markerid in markers:
                raise ValueError("Circular reference detected")
            markers[markerid] = dct
        yield '{'
        if _indent is not None:
            _current_indent_level += 1
            newline_indent = '\n' + _indent * _current_indent_level
            item_separator = _item_separator + newline_indent
            yield newline_indent
        else:
            newline_indent = None
            item_separator = _item_separator
        first = True
        if _sort_keys:
            items = sorted(dct.items(), key=lambda kv: kv[0])
        else:
            items = dct.items()
        for key, value in items:
            if isinstance(key, str):
                pass
            # JavaScript is weakly typed for these, so it makes sense to
            # also allow them.  Many encoders seem to do something like this.
            elif isinstance(key, float):
                key = _floatstr(key)
            elif key is True:
                key = 'true'
            elif key is False:
                key = 'false'
            elif key is None:
                key = 'null'
            elif isinstance(key, int):
                key = str(key)
            elif _skipkeys:
                continue
            else:
                raise TypeError("key " + repr(key) + " is not a string")
            if first:
                first = False
            else:
                yield item_separator
            yield _encoder(key)
            yield _key_separator
            if isinstance(value, str):
                yield _encoder(value)
            elif value is None:
                yield 'null'
            elif value is True:
                yield 'true'
            elif value is False:
                yield 'false'
            elif isinstance(value, int):
                yield str(value)
            elif isinstance(value, float):
                yield _floatstr(value)
            elif isinstance(value, JsonSpecialKey):
                yield value.data
            else:
                if isinstance(value, (list, tuple)):
                    chunks = _iterencode_list(value, _current_indent_level)
                elif isinstance(value, dict):
                    chunks = _iterencode_dict(value, _current_indent_level)
                else:
                    chunks = _iterencode(value, _current_indent_level)
                for chunk in chunks:
                    yield chunk
        if newline_indent is not None:
            _current_indent_level -= 1
            yield '\n' + _indent * _current_indent_level
        yield '}'
        if markers is not None:
            del markers[markerid]

    def _iterencode(o, _current_indent_level):
        if isinstance(o, str):
            yield _encoder(o)
        elif o is None:
            yield 'null'
        elif o is True:
            yield 'true'
        elif o is False:
            yield 'false'
        elif isinstance(o, int):
            yield str(o)
        elif isinstance(o, float):
            yield _floatstr(o)
        elif isinstance(o, JsonSpecialKey):
            yield o.data
        elif isinstance(o, (list, tuple)):
            for chunk in _iterencode_list(o, _current_indent_level):
                yield chunk
        elif isinstance(o, dict):
            for chunk in _iterencode_dict(o, _current_indent_level):
                yield chunk
        else:
            if markers is not None:
                markerid = id(o)
                if markerid in markers:
                    raise ValueError("Circular reference detected")
                markers[markerid