| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081 |
- import json
- import sys
- def parse_log_file(file_path, labels):
- with open(file_path, 'r') as f:
- lines = f.readlines()
- data = {"Messages": []}
- message_section = False
- current_message = {}
- # Create a regex pattern to find any of the labels
- import re
- label_pattern = re.compile(f"({'|'.join(re.escape(label) for label in labels)}):")
- i = 0
- while i < len(lines):
- line = lines[i].strip()
- if line == "Messages -":
- message_section = True
- i += 1
- continue
- if not message_section:
- match = label_pattern.match(line)
- if match:
- key = match.group(1).strip()
- value = line[len(match.group(0)):].strip()
- # Handle multi-line values for the initial fields
- while i + 1 < len(lines) and not label_pattern.match(lines[i+1]) and lines[i+1].strip() != "Messages -":
- value += " " + lines[i+1].strip()
- i += 1
- data[key] = value
- else:
- if label_pattern.match(line):
- if current_message:
- # Before starting a new message, save the previous one if it exists
- if any(current_message.values()): # Save only if not empty
- data["Messages"].append(current_message)
- current_message = {}
- key = label_pattern.match(line).group(1).strip()
- value_part = line[len(label_pattern.match(line).group(0)):].strip()
- # Multi-line message content
- if key == "Message":
- message_content = [value_part]
- while i + 1 < len(lines) and not label_pattern.match(lines[i+1]):
- message_content.append(lines[i+1].strip())
- i += 1
- current_message[key] = " ".join(message_content).strip()
- else:
- current_message[key] = value_part
- elif line.strip() and not label_pattern.match(line):
- # This handles cases where a message might not have a "Message:" label
- # and is just free text between other labeled fields.
- if "Message" in current_message:
- current_message["Message"] += " " + line
- else:
- current_message["Message"] = line
- i += 1
-
- # Add the last message
- if current_message and any(current_message.values()):
- data["Messages"].append(current_message)
- return data
- if __name__ == "__main__":
- log_file_path = sys.argv[1]
-
- with open('data_labels.json', 'r') as f:
- labels = json.load(f)
- parsed_data = parse_log_file(log_file_path, labels)
- print(json.dumps(parsed_data, indent=4))
|