import json import sys def parse_log_file(file_path, labels): with open(file_path, 'r') as f: lines = f.readlines() data = {"Messages": []} message_section = False current_message = {} # Create a regex pattern to find any of the labels import re label_pattern = re.compile(f"({'|'.join(re.escape(label) for label in labels)}):") i = 0 while i < len(lines): line = lines[i].strip() if line == "Messages -": message_section = True i += 1 continue if not message_section: match = label_pattern.match(line) if match: key = match.group(1).strip() value = line[len(match.group(0)):].strip() # Handle multi-line values for the initial fields while i + 1 < len(lines) and not label_pattern.match(lines[i+1]) and lines[i+1].strip() != "Messages -": value += " " + lines[i+1].strip() i += 1 data[key] = value else: if label_pattern.match(line): if current_message: # Before starting a new message, save the previous one if it exists if any(current_message.values()): # Save only if not empty data["Messages"].append(current_message) current_message = {} key = label_pattern.match(line).group(1).strip() value_part = line[len(label_pattern.match(line).group(0)):].strip() # Multi-line message content if key == "Message": message_content = [value_part] while i + 1 < len(lines) and not label_pattern.match(lines[i+1]): message_content.append(lines[i+1].strip()) i += 1 current_message[key] = " ".join(message_content).strip() else: current_message[key] = value_part elif line.strip() and not label_pattern.match(line): # This handles cases where a message might not have a "Message:" label # and is just free text between other labeled fields. if "Message" in current_message: current_message["Message"] += " " + line else: current_message["Message"] = line i += 1 # Add the last message if current_message and any(current_message.values()): data["Messages"].append(current_message) return data if __name__ == "__main__": log_file_path = sys.argv[1] with open('data_labels.json', 'r') as f: labels = json.load(f) parsed_data = parse_log_file(log_file_path, labels) print(json.dumps(parsed_data, indent=4))