- 论坛徽章:
- 145
|
本帖最后由 jason680 于 2017-04-06 08:11 编辑
回复 1# bmne
实战:
a.txt=1.35GB
b.txt=2.73MB
电脑内存=16GB
a大 b小
1. 先读b, 建立数组
2. 后读a, 再与数组对比
----------------------------------------
$ ls -l a.txt b.txt
-rw-rw-r-- 1 jason jason 1388888898 Apr 6 07:13 a.txt
-rw-rw-r-- 1 jason jason 2615136 Apr 6 07:27 b.txt
$ wc -l a.txt b.txt
150000000 a.txt
435555 b.txt
150435555 total
$ awk -f comp_ba.awk b.txt a.txt
Read b.txt ...
Read a.txt ...
5.1% [==................................................] do nothing 01:49
^C
$ cat comp_ba.awk
BEGIN{
_p["time_s"] = systime();
}
function bar(n, m, p,d,t,s, sec){
if(_p["total"] == 0){
print "didnot set total(_p[\"t\"]) value for bar function"
exit 1
}
t = _p["total"]
p = n / t * 100
d = int(p/2)
s = sprintf("%5.1f%", p)
s = s " ["
for(n=1;n<=50;++n){
s = s ((n <= d) ? "=" : ".")
}
s = s "]"
if(m != ""){
s = s " " m
}
sec = systime() - _p["time_s"]
s= s sprintf(" %02d:%02d", int(sec/60), sec%60);
printf("%s\r", s)
}
FNR==1{
print " Read " FILENAME " ..."
}
FNR==NR{
if(!a[$0]++){
b[$0]=1
}
next;
}
FNR == 1{
cmd="wc -l " FILENAME;
cmd | getline line;
_p["total"] = +line;
}
{
if(FNR %100==0){
bar(FNR, "do nothing")
}
# do something by yourself
# note: compare string
}
|
|