convert_to_json.py 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. import json
  2. import sys
  3. def parse_log_file(file_path, labels):
  4. with open(file_path, 'r') as f:
  5. lines = f.readlines()
  6. data = {"Messages": []}
  7. message_section = False
  8. current_message = {}
  9. # Create a regex pattern to find any of the labels
  10. import re
  11. label_pattern = re.compile(f"({'|'.join(re.escape(label) for label in labels)}):")
  12. i = 0
  13. while i < len(lines):
  14. line = lines[i].strip()
  15. if line == "Messages -":
  16. message_section = True
  17. i += 1
  18. continue
  19. if not message_section:
  20. match = label_pattern.match(line)
  21. if match:
  22. key = match.group(1).strip()
  23. value = line[len(match.group(0)):].strip()
  24. # Handle multi-line values for the initial fields
  25. while i + 1 < len(lines) and not label_pattern.match(lines[i+1]) and lines[i+1].strip() != "Messages -":
  26. value += " " + lines[i+1].strip()
  27. i += 1
  28. data[key] = value
  29. else:
  30. if label_pattern.match(line):
  31. if current_message:
  32. # Before starting a new message, save the previous one if it exists
  33. if any(current_message.values()): # Save only if not empty
  34. data["Messages"].append(current_message)
  35. current_message = {}
  36. key = label_pattern.match(line).group(1).strip()
  37. value_part = line[len(label_pattern.match(line).group(0)):].strip()
  38. # Multi-line message content
  39. if key == "Message":
  40. message_content = [value_part]
  41. while i + 1 < len(lines) and not label_pattern.match(lines[i+1]):
  42. message_content.append(lines[i+1].strip())
  43. i += 1
  44. current_message[key] = " ".join(message_content).strip()
  45. else:
  46. current_message[key] = value_part
  47. elif line.strip() and not label_pattern.match(line):
  48. # This handles cases where a message might not have a "Message:" label
  49. # and is just free text between other labeled fields.
  50. if "Message" in current_message:
  51. current_message["Message"] += " " + line
  52. else:
  53. current_message["Message"] = line
  54. i += 1
  55. # Add the last message
  56. if current_message and any(current_message.values()):
  57. data["Messages"].append(current_message)
  58. return data
  59. if __name__ == "__main__":
  60. log_file_path = sys.argv[1]
  61. with open('data_labels.json', 'r') as f:
  62. labels = json.load(f)
  63. parsed_data = parse_log_file(log_file_path, labels)
  64. print(json.dumps(parsed_data, indent=4))