import json
str_json = '{"id":"S0","label":"部门","child":[{"id":"S1","label":"管理层","child":[]},{"id":"S2","label":"人力资源部","child":[{"id":"S21","label":"招聘组","child":[{"id":"S210","label":"上海组","child":[{"id":"S2101","label":"上海其他","child":[]},{"id":"S2102","label":"陆家嘴","child":[]}]},{"id":"S211","label":"深圳组","child":[]},{"id":"S212","label":"北京组","child":[]}]},{"id":"S22","label":"考核组","child":[]},{"id":"S23","label":"制度组","child":[]}]},{"id":"S3","label":"行政管理部","child":[{"id":"S31","label":"后勤组","child":[]},{"id":"S31","label":"食堂组","child":[]}]},{"id":"S4","label":"信息技术部","child":[{"id":"S41","label":"运行部","child":[{"id":"S411","label":"运维组","child":[]}]}]}]}'
js = json.loads(str_json)
以上只是个示例,实际中最深可能有 6 层。 将上图的 js 转成如下 dataframe ( id 列在第一列也行),求赐教
id | |||||
---|---|---|---|---|---|
部门 | 管理层 | S1 | |||
部门 | 人力资源部 | 招聘组 | 上海组 | 上海其他 | S2101 |
部门 | 人力资源部 | 招聘组 | 上海组 | 陆家嘴 | S2102 |
部门 | 人力资源部 | 招聘组 | 深圳组 | S211 | |
部门 | 人力资源部 | 招聘组 | 北京组 | S212 | |
部门 | 人力资源部 | 考核组 | S22 | ||
部门 | 人力资源部 | 制度组 | S23 | ||
部门 | 行政管理部 | 后勤组 | S31 | ||
部门 | 行政管理部 | 食堂组 | S32 | ||
部门 | 行政管理部 | 设施组 | S33 | ||
部门 | 信息技术部 | 运行部 | 运维组 | S411 |
1
Donahue 2022-01-04 22:19:17 +08:00
import json
from typing import List import pandas as pd class People(): def __init__(self, js_data, label_prefix:List=None): self.id = js_data['id'] if label_prefix == None: self.label = [js_data['label']] else: self.label = label_prefix + [js_data['label']] self.child = [People(child_js, self.label) for child_js in js_data['child']] def get_label_i(self, i:int): if i >= len(self.label): return '' else: return self.label[i] def get_all_instance(p:People): result = [] result.append(p) for child in p.child: result += get_all_instance(child) return result str_json = '{"id":"S0","label":"部门","child":[{"id":"S1","label":"管理层","child":[]},{"id":"S2","label":"人力资源部","child":[{"id":"S21","label":"招聘组","child":[{"id":"S210","label":"上海组","child":[{"id":"S2101","label":"上海其他","child":[]},{"id":"S2102","label":"陆家嘴","child":[]}]},{"id":"S211","label":"深圳组","child":[]},{"id":"S212","label":"北京组","child":[]}]},{"id":"S22","label":"考核组","child":[]},{"id":"S23","label":"制度组","child":[]}]},{"id":"S3","label":"行政管理部","child":[{"id":"S31","label":"后勤组","child":[]},{"id":"S31","label":"食堂组","child":[]}]},{"id":"S4","label":"信息技术部","child":[{"id":"S41","label":"运行部","child":[{"id":"S411","label":"运维组","child":[]}]}]}]}' js = json.loads(str_json) root_instance = People(js) all_instance = get_all_instance(root_instance) df = pd.DataFrame() df['id'] = [i.id for i in all_instance] max_depth_people = max(all_instance, key=lambda x: len(x.label)) label_depth = len(max_depth_people.label) for label_num in range(label_depth): df[f"label{label_num}"] = [i.get_label_i(label_num) for i in all_instance] print(df) |
2
Donahue 2022-01-04 22:19:53 +08:00
ps: 不要吐槽代码写得烂 hh
|
3
Donahue 2022-01-04 22:24:38 +08:00
就是递归建立对象,然后把所有对象保存到列表里
|
4
fkdog 2022-01-04 22:29:42 +08:00
本质上就是树结构的遍历算法。
BFS 请用队列 DFS 请用递归 or 栈 |
7
Donahue 2022-01-04 22:38:20 +08:00
|
8
Donahue 2022-01-04 22:40:46 +08:00
|