$taskConfig 任务配置 * @return void */ public function collect(DataSourceAdapterInterface $adapter, array $taskConfig): void { $this->taskConfig = $taskConfig; $taskId = $taskConfig['task_id'] ?? ''; $taskName = $taskConfig['name'] ?? '数据库同步'; LoggerHelper::logBusiness('database_sync_collection_started', [ 'task_id' => $taskId, 'task_name' => $taskName, ]); // 控制台直接输出一条提示,方便在启动时观察数据库同步任务是否真正开始执行 error_log("[DatabaseSyncHandler] 数据库同步任务已启动:task_id={$taskId}, task_name={$taskName}"); try { // 创建 DatabaseSyncService(使用任务配置中的源和目标数据源) $this->syncService = $this->createSyncService($taskConfig); // 获取要同步的数据库列表 $databases = $this->getDatabasesToSync($taskConfig); if (empty($databases)) { LoggerHelper::logBusiness('database_sync_no_databases', [ 'task_id' => $taskId, 'message' => '没有找到要同步的数据库', ]); return; } // 启动进度日志定时器(定期输出同步进度) $this->startProgressTimer($taskConfig); // 是否执行全量同步(从业务配置中获取) $businessConfig = $this->getBusinessConfig(); $fullSyncEnabled = $businessConfig['change_stream']['full_sync_on_start'] ?? false; if ($fullSyncEnabled) { // 执行全量同步 $this->performFullSync($databases, $taskConfig); } // 启动增量同步监听(Change Streams) $this->startIncrementalSync($databases, $taskConfig); } catch (\Throwable $e) { LoggerHelper::logError($e, [ 'component' => 'DatabaseSyncHandler', 'action' => 'collect', 'task_id' => $taskId, ]); throw $e; } } /** * 获取业务配置(从独立配置文件或使用默认值) * * @return array 业务配置 */ private function getBusinessConfig(): array { // 可以从独立配置文件读取,或使用默认值 // 这里使用默认值,业务逻辑统一在代码中管理 return [ // 数据库同步配置 'databases' => [], // 空数组表示同步所有数据库 'exclude_databases' => ['admin', 'local', 'config'], // 排除的系统数据库 'exclude_collections' => ['system.profile', 'system.js'], // 排除的系统集合 // Change Streams 配置 'change_stream' => [ 'batch_size' => 100, 'max_await_time_ms' => 1000, 'full_sync_on_start' => true, // 首次启动时是否执行全量同步 'full_sync_batch_size' => 1000, ], // 重试配置 'retry' => [ 'max_connect_retries' => 10, 'retry_interval' => 5, 'max_sync_retries' => 3, 'sync_retry_interval' => 2, ], // 性能配置 'performance' => [ 'concurrent_databases' => 5, 'concurrent_collections' => 10, 'batch_write_size' => 5000, // 为了让断点续传逻辑简单可靠,这里关闭集合级并行同步 // 后续如果需要再做更复杂的分片断点策略,可以重新打开 'enable_parallel_sync' => false, 'max_parallel_tasks_per_collection' => 4, 'documents_per_task' => 100000, ], // 监控配置 'monitoring' => [ 'log_sync' => true, 'log_detail' => false, 'stats_interval' => 10, // 每10秒输出一次进度日志 ], ]; } /** * 创建 DatabaseSyncService 实例 * * @param array $taskConfig 任务配置 * @return DatabaseSyncService */ private function createSyncService(array $taskConfig): DatabaseSyncService { // 从数据库获取源和目标数据源配置 $dataSourceService = new DataSourceService(new DataSourceRepository()); $sourceDataSourceId = $taskConfig['source_data_source'] ?? 'kr_mongodb'; $targetDataSourceId = $taskConfig['target_data_source'] ?? 'sync_mongodb'; $sourceConfig = $dataSourceService->getDataSourceConfigById($sourceDataSourceId); $targetConfig = $dataSourceService->getDataSourceConfigById($targetDataSourceId); if (empty($sourceConfig) || empty($targetConfig)) { throw new \InvalidArgumentException("数据源配置不存在: source={$sourceDataSourceId}, target={$targetDataSourceId}"); } // 获取业务配置(统一在代码中管理) $businessConfig = $this->getBusinessConfig(); // 构建同步配置 $syncConfig = [ 'enabled' => true, 'source' => [ 'host' => $sourceConfig['host'], 'port' => $sourceConfig['port'], 'username' => $sourceConfig['username'] ?? '', 'password' => $sourceConfig['password'] ?? '', 'auth_source' => $sourceConfig['auth_source'] ?? 'admin', 'options' => array_merge([ 'connectTimeoutMS' => 10000, 'socketTimeoutMS' => 30000, 'serverSelectionTimeoutMS' => 10000, 'heartbeatFrequencyMS' => 10000, ], $sourceConfig['options'] ?? []), ], 'target' => [ 'host' => $targetConfig['host'], 'port' => $targetConfig['port'], 'username' => $targetConfig['username'] ?? '', 'password' => $targetConfig['password'] ?? '', 'auth_source' => $targetConfig['auth_source'] ?? 'admin', 'options' => array_merge([ 'connectTimeoutMS' => 10000, 'socketTimeoutMS' => 30000, 'serverSelectionTimeoutMS' => 10000, ], $targetConfig['options'] ?? []), ], 'sync' => [ 'databases' => $businessConfig['databases'], 'exclude_databases' => $businessConfig['exclude_databases'], 'exclude_collections' => $businessConfig['exclude_collections'], 'change_stream' => $businessConfig['change_stream'], 'retry' => $businessConfig['retry'], 'performance' => $businessConfig['performance'], ], 'monitoring' => $businessConfig['monitoring'], ]; // 直接传递配置给 DatabaseSyncService 构造函数 return new DatabaseSyncService($syncConfig); } /** * 获取要同步的数据库列表 * * @param array $taskConfig 任务配置 * @return array 数据库名称列表 */ private function getDatabasesToSync(array $taskConfig): array { return $this->syncService->getDatabasesToSync(); } /** * 执行全量同步(支持多进程数据库级并行) * * @param array $databases 数据库列表 * @param array $taskConfig 任务配置 * @return void */ private function performFullSync(array $databases, array $taskConfig): void { // 获取 Worker 信息(用于多进程分配) $workerId = $taskConfig['worker_id'] ?? 0; $workerCount = $taskConfig['worker_count'] ?? 1; // 分配数据库给当前 Worker(负载均衡算法) $assignedDatabases = $this->assignDatabasesToWorker($databases, $workerId, $workerCount); LoggerHelper::logBusiness('database_sync_full_sync_start', [ 'worker_id' => $workerId, 'worker_count' => $workerCount, 'total_databases' => count($databases), 'assigned_databases' => $assignedDatabases, 'assigned_count' => count($assignedDatabases), ]); foreach ($assignedDatabases as $databaseName) { try { $this->syncService->fullSyncDatabase($databaseName); } catch (\Throwable $e) { LoggerHelper::logError($e, [ 'component' => 'DatabaseSyncHandler', 'action' => 'performFullSync', 'database' => $databaseName, 'worker_id' => $workerId, ]); // 继续同步其他数据库 } } LoggerHelper::logBusiness('database_sync_full_sync_completed', [ 'worker_id' => $workerId, 'databases' => $assignedDatabases, ]); } /** * 分配数据库给当前 Worker(负载均衡算法) * * 策略: * 1. 按数据库大小排序(小库优先,提升完成感) * 2. 使用贪心算法:每次分配给当前负载最小的 Worker * 3. 考虑 Worker 当前处理的数据库数量 * * @param array $databases 数据库列表(已按大小排序) * @param int $workerId 当前 Worker ID * @param int $workerCount Worker 总数 * @return array 分配给当前 Worker 的数据库列表 */ private function assignDatabasesToWorker(array $databases, int $workerId, int $workerCount): array { // 如果只有一个 Worker,返回所有数据库 if ($workerCount <= 1) { return $databases; } // 方案A:简单取模分配(快速实现) // 适用于数据库数量较多且大小相近的场景 $assignedDatabases = []; foreach ($databases as $index => $databaseName) { if ($index % $workerCount === $workerId) { $assignedDatabases[] = $databaseName; } } // 方案B:负载均衡分配(推荐,但需要数据库大小信息) // 由于 getDatabasesToSync 已经按大小排序,简单取模即可实现较好的负载均衡 // 如果后续需要更精确的负载均衡,可以从 DatabaseSyncService 获取数据库大小信息 return $assignedDatabases; } /** * 启动增量同步监听(支持多进程数据库级并行) * * @param array $databases 数据库列表 * @param array $taskConfig 任务配置 * @return void */ private function startIncrementalSync(array $databases, array $taskConfig): void { // 获取 Worker 信息(用于多进程分配) $workerId = $taskConfig['worker_id'] ?? 0; $workerCount = $taskConfig['worker_count'] ?? 1; // 分配数据库给当前 Worker(与全量同步使用相同的分配策略) $assignedDatabases = $this->assignDatabasesToWorker($databases, $workerId, $workerCount); LoggerHelper::logBusiness('database_sync_incremental_sync_start', [ 'worker_id' => $workerId, 'worker_count' => $workerCount, 'total_databases' => count($databases), 'assigned_databases' => $assignedDatabases, 'assigned_count' => count($assignedDatabases), ]); // 为分配给当前 Worker 的数据库启动监听(在后台进程中) foreach ($assignedDatabases as $databaseName) { // 使用 Timer 在后台启动监听,避免阻塞 \Workerman\Timer::add(0, function () use ($databaseName) { try { $this->syncService->watchDatabase($databaseName); } catch (\Throwable $e) { LoggerHelper::logError($e, [ 'component' => 'DatabaseSyncHandler', 'action' => 'startIncrementalSync', 'database' => $databaseName, ]); // 重试逻辑(从业务配置中获取) $businessConfig = $this->getBusinessConfig(); $retryConfig = $businessConfig['retry'] ?? []; $maxRetries = $retryConfig['max_connect_retries'] ?? 10; $retryInterval = $retryConfig['retry_interval'] ?? 5; static $retryCount = []; if (!isset($retryCount[$databaseName])) { $retryCount[$databaseName] = 0; } if ($retryCount[$databaseName] < $maxRetries) { $retryCount[$databaseName]++; \Workerman\Timer::add($retryInterval, function () use ($databaseName) { $this->startIncrementalSync([$databaseName], $this->taskConfig); }, [], false); } } }, [], false); } } /** * 启动进度日志定时器 * * @param array $taskConfig 任务配置 * @return void */ private function startProgressTimer(array $taskConfig): void { $businessConfig = $this->getBusinessConfig(); $statsInterval = $businessConfig['monitoring']['stats_interval'] ?? 10; // 默认10秒输出一次进度 // 使用 Workerman Timer 定期输出进度 $this->progressTimerId = \Workerman\Timer::add($statsInterval, function () use ($taskConfig) { try { // 重新加载最新进度(从文件读取) $this->syncService->loadProgress(); $progress = $this->syncService->getProgress(); $stats = $this->syncService->getStats(); // 输出格式化的进度信息 $progressInfo = [ 'task_id' => $taskConfig['task_id'] ?? '', 'task_name' => $taskConfig['name'] ?? '数据库同步', 'status' => $progress['status'], 'progress_percent' => $progress['progress_percent'] . '%', 'current_database' => $progress['current_database'] ?? '无', 'current_collection' => $progress['current_collection'] ?? '无', 'databases' => "{$progress['databases']['completed']}/{$progress['databases']['total']}", 'collections' => "{$progress['collections']['completed']}/{$progress['collections']['total']}", 'documents' => "{$progress['documents']['processed']}/{$progress['documents']['total']}", 'documents_inserted' => $stats['documents_inserted'], 'documents_updated' => $stats['documents_updated'], 'documents_deleted' => $stats['documents_deleted'], 'errors' => $stats['errors'], 'elapsed_time' => round($progress['time']['elapsed_seconds'], 2) . 's', 'estimated_remaining' => $progress['time']['estimated_remaining_seconds'] ? round($progress['time']['estimated_remaining_seconds'], 2) . 's' : '计算中...', ]; // 输出到日志 LoggerHelper::logBusiness('database_sync_progress_report', $progressInfo); // 如果状态是错误,输出错误信息 if ($progress['status'] === 'error' && isset($progress['last_error'])) { LoggerHelper::logBusiness('database_sync_error_info', [ 'error_message' => $progress['last_error']['message'] ?? '未知错误', 'error_database' => $progress['error_database'] ?? '未知', 'error_collection' => $progress['last_error']['collection'] ?? '未知', ]); } } catch (\Throwable $e) { LoggerHelper::logError($e, [ 'component' => 'DatabaseSyncHandler', 'action' => 'startProgressTimer', ]); } }); } /** * 停止进度日志定时器 * * @return void */ public function stopProgressTimer(): void { if ($this->progressTimerId > 0) { \Workerman\Timer::del($this->progressTimerId); $this->progressTimerId = 0; } } }