- 论坛徽章:
- 0
|
感谢各位的回复,当初发下帖子,好久没等到回复,于是没上论坛看了,劳烦各位大神辛苦帮忙了,自己搞了R语言的代码,不过效率没有awk高,R代码如下:
dat <- read.table('test.txt', header = F, stringsAsFactors = F)
N <- nrow(dat)
diffValue <- diff(dat[,12])
flag <- which(diffValue > 1)
result <- as.data.frame(matrix(ncol=12, nrow=0))
M <- length(flag)-1
for (i in 1:M) {
if(flag[i+1]-flag[i] > 3) {
beg <- flag[i]+1
end <- flag[i+1]
if(sum(dat[beg:end, 11]=="M") >=4) {
tmp <- dat[beg:end, ]
result <- rbind(result, tmp)
}
}
}
# the last
if(N-flag[length(flag)]>=4) {
if(sum(dat[(flag[M+1]+1): N, 11]=="M") >=4) {
result <- rbind(result, dat[(flag[M+1]+1): N, ])
}
}
write.table(result, 'result.txt', sep=' ', col.names = F, row.names = F, quote = F)
|
|