赞
踩
输入文件在你每次点击评测的时候,平台会为你创建,无需你自己创建,只需要启动HDFS,编写python代码即可。
#! /usr/bin/python3 import sys def main(): for line in sys.stdin: line = line.strip() mapper(line) # 使用name,age分别表示姓名和年龄 def mapper(line): ########## begin ############ group = line.split('\\n') for people in group: if len(people.strip()) == 0: continue name, age = people.split(' ') ########### End ################# print("%s\t%s" % (name, age)) if __name__ == '__main__': main()
#! /usr/bin/python3 import sys from operator import itemgetter # 找出values的最大值,并按name\tmax_age的形式输出。 def reducer(k, values): ##### Begin ######### print("%s\t%s" % (k,max(values))) ##### End ######### ############################################# # mapper的输出经过分区处理后,数据行按照键排序; # 具有相同键的行排在一起; # hadoop-streaming不会合并相同键的各个值。 # 下面的代码将相同键的各个值放到同一个列表中, # 并调用reducer函数实现找出每个人的最大年龄并输出; # 输出格式为:姓名\年龄 ############################################# def main(): current_name = None ages = [] name, age = '', 0 for line in sys.stdin: line = line.strip() name, age = line.split('\t', 1) age = int(age) if current_name == name: ages.append(age) else: if current_name: reducer(current_name, ages) ages = [] ages.append(age) current_name = name # 不要忘记最后一个人 if current_name == name: reducer(current_name, ages) if __name__ == '__main__': main()
#! /usr/bin/python3 import sys def main(): for line in sys.stdin: line = line.strip() mapper(line) def mapper(line): ########## Begin ############### items = line.split('\\n') for item in items: key,value = item.split() print("%s\t%s" % (key,value)) ########### End ############# if __name__ == '__main__': main()
#! /usr/bin/python3 import sys def reducer(k, values): ############ Begin ################ value = sorted(list(set(values))) for v in value: print("%s\t%s" % (k,v)) ############ End ################ def main(): current_key = None values = [] akey, avalue = None, None for line in sys.stdin: line = line.strip() akey, avalue = line.split('\t') if current_key == akey: values.append(avalue) else: if current_key: reducer(current_key, values) values = [] values.append(avalue) current_key = akey if current_key == akey: reducer(current_key, values) if __name__ == '__main__': main()
#! /usr/bin/python3 import sys def mapper(line): ############### Begin ############ items = line.split("\\n") for item in items: child,parent = item.split() print("%s\t%s" % (child,"p-"+parent)) print("%s\t%s" % (parent,"c-"+child)) ############### End ############# def main(): for line in sys.stdin: line = line.strip() if line.startswith('child'): pass else: mapper(line) if __name__ == '__main__': main()
#! /usr/bin/python3 import sys def reducer(k, values): ############## Begin ################ grandc = [] grandp = [] for value in values: if value[:2] == 'c-': grandc.append(value[2:]) elif value[:2] == 'p-': grandp.append(value[2:]) for c in grandc: for p in grandp: print("%s\t%s" % (c,p)) ############## End ################# def main(): current_key = None values = [] akey, avalue = None, None print("grand_child\tgrand_parent") for line in sys.stdin: line = line.strip() try: akey, avalue = line.split('\t') except: continue if current_key == akey: values.append(avalue) else: if current_key: reducer(current_key, values) values = [] values.append(avalue) current_key = akey if current_key == akey: reducer(current_key, values) if __name__ == '__main__': main()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。