赞
踩
如果要处理pcap文件,python有仨库比较有名
(如果有传输层的话)
包含以下信息。
时间戳 Timestamp
二层 MAC (source, destination)
IP(source, destination)
len
ttl
DF
MF
offset
protocol
# coding:utf-8 """ 逐个packet输出长度,info等信息 https://dpkt.readthedocs.io/en/latest/print_packets.html """ import dpkt import datetime import socket from dpkt.compat import compat_ord from dpkt.ip import get_ip_proto_name def mac_addr(address): """Convert a MAC address to a readable/printable string Args: address (str): a MAC address in hex form (e.g. '\x01\x02\x03\x04\x05\x06') Returns: str: Printable/readable MAC address """ return ':'.join('%02x' % compat_ord(b) for b in address) def inet_to_str(inet): """Convert inet object to a string Args: inet (inet struct): inet network address Returns: str: Printable/readable IP address """ # First try ipv4 and then ipv6 try: return socket.inet_ntop(socket.AF_INET, inet) except ValueError: return socket.inet_ntop(socket.AF_INET6, inet) def print_packets(pcap): """Print out information about each packet in a pcap Args: pcap: dpkt pcap reader object (dpkt.pcap.Reader) """ # For each packet in the pcap process the contents for timestamp, buf in pcap: # Print out the timestamp in UTC print('Timestamp: ', str(datetime.datetime.utcfromtimestamp(timestamp))) # Unpack the Ethernet frame (mac src/dst, ethertype) eth = dpkt.ethernet.Ethernet(buf) print('Ethernet Frame: ', mac_addr(eth.src), mac_addr(eth.dst), eth.type) # Make sure the Ethernet data contains an IP packet if not isinstance(eth.data, dpkt.ip.IP): print('Non IP Packet type not supported %s\n' % eth.data.__class__.__name__) continue # Now unpack the data within the Ethernet frame (the IP packet) # Pulling out src, dst, length, fragment info, TTL, and Protocol ip = eth.data # Pull out fragment information (flags and offset all packed into off field, so use bitmasks) do_not_fragment = bool(ip.off & dpkt.ip.IP_DF) more_fragments = bool(ip.off & dpkt.ip.IP_MF) fragment_offset = ip.off & dpkt.ip.IP_OFFMASK protocol = get_ip_proto_name(ip.p) # Print out the info print('IP: %s -> %s (len=%d ttl=%d DF=%d MF=%d offset=%d protocol=%s)\n' % \ (inet_to_str(ip.src), inet_to_str(ip.dst), ip.len, ip.ttl, do_not_fragment, more_fragments, fragment_offset, protocol)) if __name__ == '__main__': with open('pcap/2021_11_02_Idle.pcap', 'rb') as f: pcap = dpkt.pcap.Reader(f) print_packets(pcap)
如果只是想要所有packet的[src_IP, dst_IP, src_MAC, dst_MAC, Protocol, TimeStamp, Info]等信息,可以直接打开Wireshark导出
这个操作也可以用Pyshark库完成。
import asyncio import os from tqdm import tqdm, trange import pyshark import pandas as pd filePath = 'pcap/nestcamPOWER_3.pcap' def pcap2csv(filePath): print(filePath) loop = asyncio.ProactorEventLoop() asyncio.set_event_loop(loop) cap = pyshark.FileCapture(filePath, only_summaries=True, eventloop=loop) # 预加载的时候很慢, 可以类比为Wireshark打开这个包的速度 cap.load_packets() packetAmount = len(cap) data3 = [] # 由于之前预加载了,处理得时候嘎嘎快! processBar = tqdm(cap, desc="Pcap Progress Bar ", total=packetAmount) for packet in processBar: line = str(packet) pItem = line.split(" ") # 1 - 时间戳 (s) # 4 - Highest Protocol # 5 - Length() data3.append(pItem[1], pItem[4], pItem[5]]) dataframe = pd.DataFrame(columns=["TimeStamp", "Protocol", "Length"], data=data3) # 保存的csv文件名字 csvName = filePath.replace("pcap/", "result/").replace("pcap", "csv") print(csvName) dataframe.to_csv(csvName, index=False, sep=',') cap.close()
Usage: pcap2csv --pcap <input pcap file> --csv <output pcap file>
pcap 中的每个数据包都呈现到 csv 文件的一行中。要提取的特定项目以及它们在 csv 中的呈现顺序在脚本的“render_csv_row”函数中进行了硬编码。另请注意,csv 中的分隔符是“|”字符,而不是逗号。
此脚本同时使用 PyShark (https://kiminewt.github.io/pyshark/) 和 Scapy 来完成其工作。PyShark是因为我们希望利用tshark强大的协议解码能力来生成CSV的“文本描述”字段(如“标准查询0xf3de A www.cisco.com”,“Client Hello”等),而Scapy则因为同时我们希望访问数据包的“有效载荷”部分(PyShark似乎无法提供这一点)。
#!/usr/bin/env python3 """pcap2csv Script to extract specific pieces of information from a pcap file and render into a csv file. Usage: <program name> --pcap <input pcap file> --csv <output pcap file> Each packet in the pcap is rendered into one row of the csv file. The specific items to extract, and the order in which they are rendered in the csv are hard-coded in the script, in the 'render_csv_row' function. Also note that the separators in the csv are '|' characters, not commas. This script uses *both* PyShark (https://kiminewt.github.io/pyshark/) and Scapy to do its work. PyShark because we want to leverage tshark's powerful protocol decoding ability to generate the "textual description" field of the CSV, and Scapy because at the same time we want to access the "payload" portion of the packet (PyShark seems to be unable to provide this). """ import argparse import os.path import sys import pyshark from scapy.utils import RawPcapReader from scapy.layers.l2 import Ether from scapy.layers.inet import IP, UDP, TCP #-------------------------------------------------- def render_csv_row(pkt_sh, pkt_sc, fh_csv): """Write one packet entry into the CSV file. pkt_sh is the PyShark representation of the packet pkt_sc is a 'bytes' representation of the packet as returned from scapy's RawPcapReader fh_csv is the csv file handle """ ether_pkt_sc = Ether(pkt_sc) if ether_pkt_sc.type != 0x800: print('Ignoring non-IP packet') return False ip_pkt_sc = ether_pkt_sc[IP] # <<<< Assuming Ethernet + IPv4 here proto = ip_pkt_sc.fields['proto'] if proto == 17: udp_pkt_sc = ip_pkt_sc[UDP] l4_payload_bytes = bytes(udp_pkt_sc.payload) l4_proto_name = 'UDP' l4_sport = udp_pkt_sc.sport l4_dport = udp_pkt_sc.dport elif proto == 6: tcp_pkt_sc = ip_pkt_sc[TCP] l4_payload_bytes = bytes(tcp_pkt_sc.payload) l4_proto_name = 'TCP' l4_sport = tcp_pkt_sc.sport l4_dport = tcp_pkt_sc.dport else: # Currently not handling packets that are not UDP or TCP print('Ignoring non-UDP/TCP packet') return False # Each line of the CSV has this format fmt = '{0}|{1}|{2}({3})|{4}|{5}:{6}|{7}:{8}|{9}|{10}' # | | | | | | | | | | | # | | | | | | | | | | o-> {10} L4 payload hexdump # | | | | | | | | | o-----> {9} total pkt length # | | | | | | | | o---------> {8} dst port # | | | | | | | o-------------> {7} dst ip address # | | | | | | o-----------------> {6} src port # | | | | | o---------------------> {5} src ip address # | | | | o-------------------------> {4} text description # | | | o------------------------------> {3} L4 protocol # | | o----------------------------------> {2} highest protocol # | o--------------------------------------> {1} time # o------------------------------------------> {0} frame number # Example: # 1|0.0|DNS(UDP)|Standard query 0xf3de A www.cisco.com|192.168.1.116:57922|1.1.1.1:53|73|f3de010000010000000000000377777705636973636f03636f6d0000010001 print(fmt.format(pkt_sh.no, # {0} pkt_sh.time, # {1} pkt_sh.protocol, # {2} l4_proto_name, # {3} pkt_sh.info, # {4} pkt_sh.source, # {5} l4_sport, # {6} pkt_sh.destination, # {7} l4_dport, # {8} pkt_sh.length, # {9} l4_payload_bytes.hex()), # {10} file=fh_csv) return True #-------------------------------------------------- def pcap2csv(in_pcap, out_csv): """Main entry function called from main to process the pcap and generate the csv file. in_pcap = name of the input pcap file (guaranteed to exist) out_csv = name of the output csv file (will be created) This function walks over each packet in the pcap file, and for each packet invokes the render_csv_row() function to write one row of the csv. """ # Open the pcap file with PyShark in "summary-only" mode, since this # is the mode where the brief textual description of the packet (e.g. # "Standard query 0xf3de A www.cisco.com", "Client Hello" etc.) are # made available. pcap_pyshark = pyshark.FileCapture(in_pcap, only_summaries=True) pcap_pyshark.load_packets() pcap_pyshark.reset() frame_num = 0 ignored_packets = 0 with open(out_csv, 'w') as fh_csv: # Open the pcap file with scapy's RawPcapReader, and iterate over # each packet. In each iteration get the PyShark packet as well, # and then call render_csv_row() with both representations to generate # the CSV row. for (pkt_scapy, _) in RawPcapReader(in_pcap): try: pkt_pyshark = pcap_pyshark.next_packet() frame_num += 1 if not render_csv_row(pkt_pyshark, pkt_scapy, fh_csv): ignored_packets += 1 except StopIteration: # Shouldn't happen because the RawPcapReader iterator should also # exit before this happens. break print('{} packets read, {} packets not written to CSV'. format(frame_num, ignored_packets)) #-------------------------------------------------- def command_line_args(): """Helper called from main() to parse the command line arguments""" parser = argparse.ArgumentParser() parser.add_argument('--pcap', metavar='<input pcap file>', help='pcap file to parse', required=True) parser.add_argument('--csv', metavar='<output csv file>', help='csv file to create', required=True) args = parser.parse_args() return args #-------------------------------------------------- def main(): """Program main entry""" args = command_line_args() if not os.path.exists(args.pcap): print('Input pcap file "{}" does not exist'.format(args.pcap), file=sys.stderr) sys.exit(-1) if os.path.exists(args.csv): print('Output csv file "{}" already exists, ' 'won\'t overwrite'.format(args.csv), file=sys.stderr) sys.exit(-1) pcap2csv(args.pcap, args.csv) #-------------------------------------------------- if __name__ == '__main__': main()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。