- 论坛徽章:
- 0
|
<br>
当使用hadoop成为web日志或生产日志的存储池后,我们可以使用./hive登录到hive shell中<br>
进行查询操作。但为了方便,在查询方面用python进行封装好相应方法进行使用,用java进行我们的统计分析。<br>
<br>
使用hive客户端的相关功能只需要将${hive_home}/lib/py路径加入但python的运行环境路径中就行了。<br>
如以下脚本:<br>
<br>
<br>
#/usr/bin/env python<br>
#-*- coding: utf-8 -*-<br>
<br>
import os,sys<br>
<br>
dir = os.path.dirname(os.path.realpath(sys.argv[0]))<br>
<br>
sys.path.insert(0, dir + '/lib/py') <font color="#f00000">####引入相关python for hive 的相关环境</font><br>
<br>
<br>
from hive_service import ThriftHive<br>
from hive_service.ttypes import HiveServerException<br>
from thrift import Thrift<br>
from thrift.transport import TSocket<br>
from thrift.transport import TTransport<br>
from thrift.protocol import TBinaryProtocol<br>
<br>
<br>
def hiveExe(sql):<br>
try:<br>
transport = TSocket.TSocket('192.168.237.13', 10000) <br>
transport = TTransport.TBufferedTransport(transport)<br>
protocol = TBinaryProtocol.TBinaryProtocol(transport)<br>
client = ThriftHive.Client(protocol)<br>
transport.open()<br>
client.execute(sql)<br>
print "The return value is : "+client.fetchOne()<br>
transport.close()<br>
except Thrift.TException, tx:<br>
print '%s' % (tx.message)<br>
<br>
if __name__ == '__main__':<br>
print hiveExe("select * from img_ana limit 2")<br> |
|