当前位置:   article > 正文

python连接hive_python hive

python hive

环境 python 2.7.18

pyhive

  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. https://www.cnblogs.com/hider/p/16279747.html
  5. # Liunx系统
  6. pip install sasl
  7. pip install thrift
  8. pip install thrift-sasl
  9. pip install PyHive
  10. pip install pandas
  11. # Windows系统会出现莫名其妙的报错
  12. """
  13. from pyhive import hive
  14. import pandas as pd
  15. # def read_jdbc(host, port, database: str, table: str, query_sql: str) -> DataFrame:
  16. def read_jdbc(host, port, database, table, query_sql) :
  17. # 1、连接hive服务端
  18. conn = hive.Connection(
  19. host=host, port=10000, database=database)
  20. cursor = conn.cursor()
  21. # logger.info('connect hive successfully.')
  22. # 2、执行hive sql
  23. cursor.execute(query_sql)
  24. # logger.info('query hive table successfully.')
  25. # 3、返回pandas.dataframe
  26. table_len = len(table) + 1
  27. columns = [col[0] for col in cursor.description]
  28. col = list(map(lambda x: x[table_len:], columns))
  29. result = cursor.fetchall()
  30. # 关闭连接 释放资源
  31. cursor.close()
  32. conn.close()
  33. return pd.DataFrame(result, columns=col)
  34. if __name__ == "__main__":
  35. # print(__name__)
  36. host = '***'
  37. port = '10000'
  38. database = 'tdxdb'
  39. table = ''
  40. query_sql = 'select * from tdxdb.t_enterprise limit 2'
  41. rs = read_jdbc(host, port, database, table, query_sql)
  42. print(rs)

impala

  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. pip install pure-sasl
  5. pip install thrift_sasl==0.2.1 --no-deps
  6. pip install thrift==0.9.3
  7. pip install thriftpy
  8. pip install impyla
  9. # Windows系统会出现莫名其妙的报错
  10. """
  11. from impala.dbapi import connect
  12. conn = connect(host='***', port=10000, auth_mechanism='PLAIN', user='hdfs', database='default')
  13. cursor = conn.cursor()
  14. cursor.execute('show tables')
  15. for row in cursor:
  16. print(row)
  17. # 关闭连接
  18. cursor.close()
  19. conn.close()

HiveServer2 

  1. # 使用 Python 代码来连接 HiveServer2
  2. https://www.tencentcloud.com/zh/document/product/1026/31148
  3. pip install pyhs2
  4. 暂未成功
  5. compilation terminated.
  6. error: command 'gcc' failed with exit status 1
  7. ----------------------------------------
  8. ERROR: Command errored out with exit status 1: /usr/bin/python -u -c 'import sys, setuptools, tokenize; sys.argv[0] = '"'"'/tmp/pip-install-0YDrAQ/sasl/setup.py'"'"'; __file__='"'"'/tmp/pip-install-0YDrAQ/sasl/setup.py'"'"';f=getattr(tokenize, '"'"'open'"'"', open)(__file__);code=f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec'"'"'))' install --record /tmp/pip-record-ukxUok/install-record.txt --single-version-externally-managed --compile --install-headers /usr/local/python2.7.18/include/python2.7/sasl Check the logs for full command output.
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/Li_阴宅/article/detail/855526
推荐阅读
相关标签
  

闽ICP备14008679号