| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399 |
- syntax = "proto3";
- package worker_pb;
- option go_package = "github.com/seaweedfs/seaweedfs/weed/pb/worker_pb";
- // WorkerService provides bidirectional communication between admin and worker
- service WorkerService {
- // WorkerStream maintains a bidirectional stream for worker communication
- rpc WorkerStream(stream WorkerMessage) returns (stream AdminMessage);
- }
- // WorkerMessage represents messages from worker to admin
- message WorkerMessage {
- string worker_id = 1;
- int64 timestamp = 2;
-
- oneof message {
- WorkerRegistration registration = 3;
- WorkerHeartbeat heartbeat = 4;
- TaskRequest task_request = 5;
- TaskUpdate task_update = 6;
- TaskComplete task_complete = 7;
- WorkerShutdown shutdown = 8;
- TaskLogResponse task_log_response = 9;
- }
- }
- // AdminMessage represents messages from admin to worker
- message AdminMessage {
- string admin_id = 1;
- int64 timestamp = 2;
-
- oneof message {
- RegistrationResponse registration_response = 3;
- HeartbeatResponse heartbeat_response = 4;
- TaskAssignment task_assignment = 5;
- TaskCancellation task_cancellation = 6;
- AdminShutdown admin_shutdown = 7;
- TaskLogRequest task_log_request = 8;
- }
- }
- // WorkerRegistration message when worker connects
- message WorkerRegistration {
- string worker_id = 1;
- string address = 2;
- repeated string capabilities = 3;
- int32 max_concurrent = 4;
- map<string, string> metadata = 5;
- }
- // RegistrationResponse confirms worker registration
- message RegistrationResponse {
- bool success = 1;
- string message = 2;
- string assigned_worker_id = 3;
- }
- // WorkerHeartbeat sent periodically by worker
- message WorkerHeartbeat {
- string worker_id = 1;
- string status = 2;
- int32 current_load = 3;
- int32 max_concurrent = 4;
- repeated string current_task_ids = 5;
- int32 tasks_completed = 6;
- int32 tasks_failed = 7;
- int64 uptime_seconds = 8;
- }
- // HeartbeatResponse acknowledges heartbeat
- message HeartbeatResponse {
- bool success = 1;
- string message = 2;
- }
- // TaskRequest from worker asking for new tasks
- message TaskRequest {
- string worker_id = 1;
- repeated string capabilities = 2;
- int32 available_slots = 3;
- }
- // TaskAssignment from admin to worker
- message TaskAssignment {
- string task_id = 1;
- string task_type = 2;
- TaskParams params = 3;
- int32 priority = 4;
- int64 created_time = 5;
- map<string, string> metadata = 6;
- }
- // TaskParams contains task-specific parameters with typed variants
- message TaskParams {
- string task_id = 1; // ActiveTopology task ID for lifecycle management
- uint32 volume_id = 2; // Primary volume ID for the task
- string collection = 3; // Collection name
- string data_center = 4; // Primary data center
- string rack = 5; // Primary rack
- uint64 volume_size = 6; // Original volume size in bytes for tracking size changes
-
- // Unified source and target arrays for all task types
- repeated TaskSource sources = 7; // Source locations (volume replicas, EC shards, etc.)
- repeated TaskTarget targets = 8; // Target locations (destinations, new replicas, etc.)
-
- // Typed task parameters
- oneof task_params {
- VacuumTaskParams vacuum_params = 9;
- ErasureCodingTaskParams erasure_coding_params = 10;
- BalanceTaskParams balance_params = 11;
- ReplicationTaskParams replication_params = 12;
- }
- }
- // VacuumTaskParams for vacuum operations
- message VacuumTaskParams {
- double garbage_threshold = 1; // Minimum garbage ratio to trigger vacuum
- bool force_vacuum = 2; // Force vacuum even if below threshold
- int32 batch_size = 3; // Number of files to process per batch
- string working_dir = 4; // Working directory for temporary files
- bool verify_checksum = 5; // Verify file checksums during vacuum
- }
- // ErasureCodingTaskParams for EC encoding operations
- message ErasureCodingTaskParams {
- uint64 estimated_shard_size = 1; // Estimated size per shard
- int32 data_shards = 2; // Number of data shards (default: 10)
- int32 parity_shards = 3; // Number of parity shards (default: 4)
- string working_dir = 4; // Working directory for EC processing
- string master_client = 5; // Master server address
- bool cleanup_source = 6; // Whether to cleanup source volume after EC
- }
- // TaskSource represents a unified source location for any task type
- message TaskSource {
- string node = 1; // Source server address
- uint32 disk_id = 2; // Source disk ID
- string rack = 3; // Source rack for tracking
- string data_center = 4; // Source data center for tracking
- uint32 volume_id = 5; // Volume ID (for volume operations)
- repeated uint32 shard_ids = 6; // Shard IDs (for EC shard operations)
- uint64 estimated_size = 7; // Estimated size to be processed
- }
- // TaskTarget represents a unified target location for any task type
- message TaskTarget {
- string node = 1; // Target server address
- uint32 disk_id = 2; // Target disk ID
- string rack = 3; // Target rack for tracking
- string data_center = 4; // Target data center for tracking
- uint32 volume_id = 5; // Volume ID (for volume operations)
- repeated uint32 shard_ids = 6; // Shard IDs (for EC shard operations)
- uint64 estimated_size = 7; // Estimated size to be created
- }
- // BalanceTaskParams for volume balancing operations
- message BalanceTaskParams {
- bool force_move = 1; // Force move even with conflicts
- int32 timeout_seconds = 2; // Operation timeout
- }
- // ReplicationTaskParams for adding replicas
- message ReplicationTaskParams {
- int32 replica_count = 1; // Target replica count
- bool verify_consistency = 2; // Verify replica consistency after creation
- }
- // TaskUpdate reports task progress
- message TaskUpdate {
- string task_id = 1;
- string worker_id = 2;
- string status = 3;
- float progress = 4;
- string message = 5;
- map<string, string> metadata = 6;
- }
- // TaskComplete reports task completion
- message TaskComplete {
- string task_id = 1;
- string worker_id = 2;
- bool success = 3;
- string error_message = 4;
- int64 completion_time = 5;
- map<string, string> result_metadata = 6;
- }
- // TaskCancellation from admin to cancel a task
- message TaskCancellation {
- string task_id = 1;
- string reason = 2;
- bool force = 3;
- }
- // WorkerShutdown notifies admin that worker is shutting down
- message WorkerShutdown {
- string worker_id = 1;
- string reason = 2;
- repeated string pending_task_ids = 3;
- }
- // AdminShutdown notifies worker that admin is shutting down
- message AdminShutdown {
- string reason = 1;
- int32 graceful_shutdown_seconds = 2;
- }
- // ========== Task Log Messages ==========
- // TaskLogRequest requests logs for a specific task
- message TaskLogRequest {
- string task_id = 1;
- string worker_id = 2;
- bool include_metadata = 3; // Include task metadata
- int32 max_entries = 4; // Maximum number of log entries (0 = all)
- string log_level = 5; // Filter by log level (INFO, WARNING, ERROR, DEBUG)
- int64 start_time = 6; // Unix timestamp for start time filter
- int64 end_time = 7; // Unix timestamp for end time filter
- }
- // TaskLogResponse returns task logs and metadata
- message TaskLogResponse {
- string task_id = 1;
- string worker_id = 2;
- bool success = 3;
- string error_message = 4;
- TaskLogMetadata metadata = 5;
- repeated TaskLogEntry log_entries = 6;
- }
- // TaskLogMetadata contains metadata about task execution
- message TaskLogMetadata {
- string task_id = 1;
- string task_type = 2;
- string worker_id = 3;
- int64 start_time = 4;
- int64 end_time = 5;
- int64 duration_ms = 6;
- string status = 7;
- float progress = 8;
- uint32 volume_id = 9;
- string server = 10;
- string collection = 11;
- string log_file_path = 12;
- int64 created_at = 13;
- map<string, string> custom_data = 14;
- }
- // TaskLogEntry represents a single log entry
- message TaskLogEntry {
- int64 timestamp = 1;
- string level = 2;
- string message = 3;
- map<string, string> fields = 4;
- float progress = 5;
- string status = 6;
- }
- // ========== Maintenance Configuration Messages ==========
- // MaintenanceConfig holds configuration for the maintenance system
- message MaintenanceConfig {
- bool enabled = 1;
- int32 scan_interval_seconds = 2; // How often to scan for maintenance needs
- int32 worker_timeout_seconds = 3; // Worker heartbeat timeout
- int32 task_timeout_seconds = 4; // Individual task timeout
- int32 retry_delay_seconds = 5; // Delay between retries
- int32 max_retries = 6; // Default max retries for tasks
- int32 cleanup_interval_seconds = 7; // How often to clean up old tasks
- int32 task_retention_seconds = 8; // How long to keep completed/failed tasks
- MaintenancePolicy policy = 9;
- }
- // MaintenancePolicy defines policies for maintenance operations
- message MaintenancePolicy {
- map<string, TaskPolicy> task_policies = 1; // Task type -> policy mapping
- int32 global_max_concurrent = 2; // Overall limit across all task types
- int32 default_repeat_interval_seconds = 3; // Default seconds if task doesn't specify
- int32 default_check_interval_seconds = 4; // Default seconds for periodic checks
- }
- // TaskPolicy represents configuration for a specific task type
- message TaskPolicy {
- bool enabled = 1;
- int32 max_concurrent = 2;
- int32 repeat_interval_seconds = 3; // Seconds to wait before repeating
- int32 check_interval_seconds = 4; // Seconds between checks
-
- // Typed task-specific configuration (replaces generic map)
- oneof task_config {
- VacuumTaskConfig vacuum_config = 5;
- ErasureCodingTaskConfig erasure_coding_config = 6;
- BalanceTaskConfig balance_config = 7;
- ReplicationTaskConfig replication_config = 8;
- }
- }
- // Task-specific configuration messages
- // VacuumTaskConfig contains vacuum-specific configuration
- message VacuumTaskConfig {
- double garbage_threshold = 1; // Minimum garbage ratio to trigger vacuum (0.0-1.0)
- int32 min_volume_age_hours = 2; // Minimum age before vacuum is considered
- int32 min_interval_seconds = 3; // Minimum time between vacuum operations on the same volume
- }
- // ErasureCodingTaskConfig contains EC-specific configuration
- message ErasureCodingTaskConfig {
- double fullness_ratio = 1; // Minimum fullness ratio to trigger EC (0.0-1.0)
- int32 quiet_for_seconds = 2; // Minimum quiet time before EC
- int32 min_volume_size_mb = 3; // Minimum volume size for EC
- string collection_filter = 4; // Only process volumes from specific collections
- }
- // BalanceTaskConfig contains balance-specific configuration
- message BalanceTaskConfig {
- double imbalance_threshold = 1; // Threshold for triggering rebalancing (0.0-1.0)
- int32 min_server_count = 2; // Minimum number of servers required for balancing
- }
- // ReplicationTaskConfig contains replication-specific configuration
- message ReplicationTaskConfig {
- int32 target_replica_count = 1; // Target number of replicas
- }
- // ========== Task Persistence Messages ==========
- // MaintenanceTaskData represents complete task state for persistence
- message MaintenanceTaskData {
- string id = 1;
- string type = 2;
- string priority = 3;
- string status = 4;
- uint32 volume_id = 5;
- string server = 6;
- string collection = 7;
- TaskParams typed_params = 8;
- string reason = 9;
- int64 created_at = 10;
- int64 scheduled_at = 11;
- int64 started_at = 12;
- int64 completed_at = 13;
- string worker_id = 14;
- string error = 15;
- double progress = 16;
- int32 retry_count = 17;
- int32 max_retries = 18;
- // Enhanced fields for detailed task tracking
- string created_by = 19;
- string creation_context = 20;
- repeated TaskAssignmentRecord assignment_history = 21;
- string detailed_reason = 22;
- map<string, string> tags = 23;
- TaskCreationMetrics creation_metrics = 24;
- }
- // TaskAssignmentRecord tracks worker assignments for a task
- message TaskAssignmentRecord {
- string worker_id = 1;
- string worker_address = 2;
- int64 assigned_at = 3;
- int64 unassigned_at = 4; // Optional: when worker was unassigned
- string reason = 5; // Reason for assignment/unassignment
- }
- // TaskCreationMetrics tracks why and how a task was created
- message TaskCreationMetrics {
- string trigger_metric = 1; // Name of metric that triggered creation
- double metric_value = 2; // Value that triggered creation
- double threshold = 3; // Threshold that was exceeded
- VolumeHealthMetrics volume_metrics = 4; // Volume health at creation time
- map<string, string> additional_data = 5; // Additional context data
- }
- // VolumeHealthMetrics captures volume state at task creation
- message VolumeHealthMetrics {
- uint64 total_size = 1;
- uint64 used_size = 2;
- uint64 garbage_size = 3;
- double garbage_ratio = 4;
- int32 file_count = 5;
- int32 deleted_file_count = 6;
- int64 last_modified = 7;
- int32 replica_count = 8;
- bool is_ec_volume = 9;
- string collection = 10;
- }
- // TaskStateFile wraps task data with metadata for persistence
- message TaskStateFile {
- MaintenanceTaskData task = 1;
- int64 last_updated = 2;
- string admin_version = 3;
- }
|