jq处理文本合并json数据
不好意思各位大神,做一次伸手党:lol:从数据库中导出了如下文本数据 1.txt 的内容
{"分类A": {"url": "1310222208d125e4b54e0fe2e2.jpg", "link": "123.html", "title": "题目a"}}
{"分类A": {"url": "1310222208ce4207dfc9be676d.jpg", "link": "454.html", "title": "题目b"}}
{"分类B": {"url": "13102222083e5d22e5f198bb64.jpg", "link": "333.html", "title": "题目c"}}
{"分类B": {"url": "1603222322b21be8b598485481.jpg", "link": "653.html", "title": "题目d"}}
{"分类C": {"url": "16032223224da681cbeb817221.jpg", "link": "778.html", "title": "题目e"}}
{"分类C": {"url": "16032223221e21022ca3390f04.jpg", "link": "999.html", "title": "题目f"}}
可以看到1.txt本身不符合json规范标准,但是每一条数据都是符合json规范的,想把“分类X”这个key进行去重复,它的值合并为数组,用[]包起来,想得到如下符合json规范标准的数据:
{
"分类A": [{
"url": "1310222208d125e4b54e0fe2e2.jpg",
"link": "123.html",
"title": "题目a"
},
{
"url": "1310222208ce4207dfc9be676d.jpg",
"link": "454.html",
"title": "题目b"
}
],
"分类B": [{
"url": "13102222083e5d22e5f198bb64.jpg",
"link": "333.html",
"title": "题目c"
},
{
"url": "1603222322b21be8b598485481.jpg",
"link": "653.html",
"title": "题目d"
}
],
"分类C": [{
"url": "16032223224da681cbeb817221.jpg",
"link": "778.html",
"title": "题目e"
},
{
"url": "16032223221e21022ca3390f04.jpg",
"link": "999.html",
"title": "题目f"
}
]
}
用shell或者jq都可以
本帖最后由 wh7211 于 2023-09-06 12:49 编辑
回复 1# opiopuiopoi
awk -F"[{}]" 'BEGIN{a4=" ";a8=" ";a11=" "}!b[$2]++{if(c){e=c"["d"],";f=f?f"\n"e:e;c=d=""}}{c=a4""$2;d=d?d",{ "$3"}":"{ "$3"}"}END{if(c){e=c"["d"]";f=f?f"\n"e:e;f=gensub(/({|",)/,"\\1\n"a11"","g",f);f=gensub(/(},)/,"\n"a8"\\1\n"a8"","g",f);f=gensub(/(}](,|))/,"\n"a8"\\1","g",f);f=gensub(/(](,|))/,"\n"a4"\\1","g",f);print "{\n"f"\n}"}}' 1
awk -F"[{ }]+" 'BEGIN{a4=" ";a8=" ";a12=" ";print "{"}!b[$2]++{if(d){print d" ["e"\n"a4"],";d=e=""}}{for(i=3;i<NF;i++){if(i%2==1){$i="\n"a12""$i" "}else if(i==NF-1){$i=$i"\n"a8};c=c?c""$i:$i};d=a4""$2;e=e?e",\n"a8"{"c"}":"{"c"}";c=""}END{if(d){print d" ["e"\n"a4"]\n}"}}' 1
awk 'BEGIN{print "{"}{match($0,"^{([^{]+)({[^}]+})}$",a);if(!b]++&&c){print c"["d"],";c=d=""};c=a;d=d?d","a:a}END{if(c){print c"["d"]}"}}' 1|jq '.' # cat 1.rb
#!/usr/bin/ruby
require 'json'
rs = {}
IO.foreach("1.txt") do |line|
t = JSON.parse(line)
t.each do |k,v|
rs ||= []
rs << v
end
end
puts rs.to_json(:indent => "", :array_nl => "\n", :object_nl => "\n")
# ./1.rb
{
"分类A":[
{
"url":"1310222208d125e4b54e0fe2e2.jpg",
"link":"123.html",
"title":"题目a"
},
{
"url":"1310222208ce4207dfc9be676d.jpg",
"link":"454.html",
"title":"题目b"
}
],
"分类B":[
{
"url":"13102222083e5d22e5f198bb64.jpg",
"link":"333.html",
"title":"题目c"
},
{
"url":"1603222322b21be8b598485481.jpg",
"link":"653.html",
"title":"题目d"
}
],
"分类C":[
{
"url":"16032223224da681cbeb817221.jpg",
"link":"778.html",
"title":"题目e"
},
{
"url":"16032223221e21022ca3390f04.jpg",
"link":"999.html",
"title":"题目f"
}
]
}
本帖最后由 csccyab 于 2023-10-22 19:15 编辑
用 python 写了这个:
import json
from collections import defaultdict
new_dict = defaultdict(list)
f = open("1.txt", "r")
for line in f:
first_key = list(json.loads(line).keys())
new_dict.append(json.loads(line))
print(json.dumps(new_dict, indent=4, ensure_ascii=False))
Output:
$ python3 jsonify.py
{
"分类A": [
{
"url": "1310222208d125e4b54e0fe2e2.jpg",
"link": "123.html",
"title": "题目a"
},
{
"url": "1310222208ce4207dfc9be676d.jpg",
"link": "454.html",
"title": "题目b"
}
],
"分类B": [
{
"url": "13102222083e5d22e5f198bb64.jpg",
"link": "333.html",
"title": "题目c"
},
{
"url": "1603222322b21be8b598485481.jpg",
"link": "653.html",
"title": "题目d"
}
],
"分类C": [
{
"url": "16032223224da681cbeb817221.jpg",
"link": "778.html",
"title": "题目e"
},
{
"url": "16032223221e21022ca3390f04.jpg",
"link": "999.html",
"title": "题目f"
}
]
}
页:
[1]