数据中心同步
This commit is contained in:
@@ -0,0 +1,115 @@
|
||||
<?php
|
||||
|
||||
namespace app\service\DataCollection\Handler;
|
||||
|
||||
use app\repository\DataSourceRepository;
|
||||
use app\service\DataSourceService;
|
||||
use app\utils\LoggerHelper;
|
||||
use app\utils\MongoDBHelper;
|
||||
use MongoDB\Client;
|
||||
|
||||
/**
|
||||
* 数据采集 Handler 基类
|
||||
*
|
||||
* 提供通用的数据采集功能:
|
||||
* - MongoDB 客户端创建
|
||||
* - 数据源配置获取
|
||||
* - 目标数据源连接
|
||||
* - 公共服务实例(IdentifierService、ConsumptionService、StoreService)
|
||||
*/
|
||||
abstract class BaseCollectionHandler
|
||||
{
|
||||
use Trait\DataCollectionHelperTrait;
|
||||
|
||||
protected DataSourceService $dataSourceService;
|
||||
protected \app\service\IdentifierService $identifierService;
|
||||
protected \app\service\ConsumptionService $consumptionService;
|
||||
protected \app\service\StoreService $storeService;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->dataSourceService = new DataSourceService(
|
||||
new DataSourceRepository()
|
||||
);
|
||||
|
||||
// 初始化公共服务(避免在子类中重复实例化)
|
||||
$this->identifierService = new \app\service\IdentifierService(
|
||||
new \app\repository\UserProfileRepository(),
|
||||
new \app\service\UserPhoneService(
|
||||
new \app\repository\UserPhoneRelationRepository()
|
||||
)
|
||||
);
|
||||
|
||||
$this->consumptionService = new \app\service\ConsumptionService(
|
||||
new \app\repository\ConsumptionRecordRepository(),
|
||||
new \app\repository\UserProfileRepository(),
|
||||
$this->identifierService
|
||||
);
|
||||
|
||||
$this->storeService = new \app\service\StoreService(
|
||||
new \app\repository\StoreRepository()
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取 MongoDB 客户端
|
||||
*
|
||||
* @param array<string, mixed> $taskConfig 任务配置
|
||||
* @return Client MongoDB 客户端实例
|
||||
* @throws \InvalidArgumentException 如果数据源配置不存在
|
||||
*/
|
||||
protected function getMongoClient(array $taskConfig): Client
|
||||
{
|
||||
$dataSourceId = $taskConfig['data_source_id']
|
||||
?? $taskConfig['data_source']
|
||||
?? 'sync_mongodb';
|
||||
|
||||
$dataSourceConfig = $this->dataSourceService->getDataSourceConfigById($dataSourceId);
|
||||
|
||||
if (empty($dataSourceConfig)) {
|
||||
throw new \InvalidArgumentException("数据源配置不存在: {$dataSourceId}");
|
||||
}
|
||||
|
||||
return MongoDBHelper::createClient($dataSourceConfig);
|
||||
}
|
||||
|
||||
/**
|
||||
* 连接到目标数据源
|
||||
*
|
||||
* @param string $targetDataSourceId 目标数据源ID
|
||||
* @param string|null $targetDatabase 目标数据库名(可选,默认使用数据源配置中的数据库)
|
||||
* @return array{client: Client, database: \MongoDB\Database, dbName: string, config: array} 连接信息
|
||||
* @throws \InvalidArgumentException 如果目标数据源配置不存在
|
||||
*/
|
||||
protected function connectToTargetDataSource(
|
||||
string $targetDataSourceId,
|
||||
?string $targetDatabase = null
|
||||
): array {
|
||||
$targetDataSourceConfig = $this->dataSourceService->getDataSourceConfigById($targetDataSourceId);
|
||||
|
||||
if (empty($targetDataSourceConfig)) {
|
||||
throw new \InvalidArgumentException("目标数据源配置不存在: {$targetDataSourceId}");
|
||||
}
|
||||
|
||||
$client = MongoDBHelper::createClient($targetDataSourceConfig);
|
||||
$dbName = $targetDatabase ?? $targetDataSourceConfig['database'] ?? 'ckb';
|
||||
$database = $client->selectDatabase($dbName);
|
||||
|
||||
return [
|
||||
'client' => $client,
|
||||
'database' => $database,
|
||||
'dbName' => $dbName,
|
||||
'config' => $targetDataSourceConfig,
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* 采集数据(抽象方法,由子类实现)
|
||||
*
|
||||
* @param mixed $adapter 数据源适配器
|
||||
* @param array<string, mixed> $taskConfig 任务配置
|
||||
* @return void
|
||||
*/
|
||||
abstract public function collect($adapter, array $taskConfig): void;
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,427 @@
|
||||
<?php
|
||||
|
||||
namespace app\service\DataCollection\Handler;
|
||||
|
||||
use app\service\DataSource\DataSourceAdapterInterface;
|
||||
use app\service\DatabaseSyncService;
|
||||
use app\service\DataSourceService;
|
||||
use app\repository\DataSourceRepository;
|
||||
use app\utils\LoggerHelper;
|
||||
use MongoDB\Client;
|
||||
|
||||
/**
|
||||
* 数据库同步采集处理类
|
||||
*
|
||||
* 职责:
|
||||
* - 从源数据库同步数据到目标数据库
|
||||
* - 支持全量同步和增量同步(Change Streams)
|
||||
* - 处理同步进度和错误恢复
|
||||
*/
|
||||
class DatabaseSyncHandler
|
||||
{
|
||||
private DatabaseSyncService $syncService;
|
||||
private array $taskConfig;
|
||||
private int $progressTimerId = 0; // 进度日志定时器ID
|
||||
|
||||
/**
|
||||
* 采集/同步数据库
|
||||
*
|
||||
* @param DataSourceAdapterInterface $adapter 数据源适配器(源数据库)
|
||||
* @param array<string, mixed> $taskConfig 任务配置
|
||||
* @return void
|
||||
*/
|
||||
public function collect(DataSourceAdapterInterface $adapter, array $taskConfig): void
|
||||
{
|
||||
$this->taskConfig = $taskConfig;
|
||||
$taskId = $taskConfig['task_id'] ?? '';
|
||||
$taskName = $taskConfig['name'] ?? '数据库同步';
|
||||
|
||||
LoggerHelper::logBusiness('database_sync_collection_started', [
|
||||
'task_id' => $taskId,
|
||||
'task_name' => $taskName,
|
||||
]);
|
||||
// 控制台直接输出一条提示,方便在启动时观察数据库同步任务是否真正开始执行
|
||||
error_log("[DatabaseSyncHandler] 数据库同步任务已启动:task_id={$taskId}, task_name={$taskName}");
|
||||
|
||||
try {
|
||||
// 创建 DatabaseSyncService(使用任务配置中的源和目标数据源)
|
||||
$this->syncService = $this->createSyncService($taskConfig);
|
||||
|
||||
// 获取要同步的数据库列表
|
||||
$databases = $this->getDatabasesToSync($taskConfig);
|
||||
|
||||
if (empty($databases)) {
|
||||
LoggerHelper::logBusiness('database_sync_no_databases', [
|
||||
'task_id' => $taskId,
|
||||
'message' => '没有找到要同步的数据库',
|
||||
]);
|
||||
return;
|
||||
}
|
||||
|
||||
// 启动进度日志定时器(定期输出同步进度)
|
||||
$this->startProgressTimer($taskConfig);
|
||||
|
||||
// 是否执行全量同步(从业务配置中获取)
|
||||
$businessConfig = $this->getBusinessConfig();
|
||||
$fullSyncEnabled = $businessConfig['change_stream']['full_sync_on_start'] ?? false;
|
||||
|
||||
if ($fullSyncEnabled) {
|
||||
// 执行全量同步
|
||||
$this->performFullSync($databases, $taskConfig);
|
||||
}
|
||||
|
||||
// 启动增量同步监听(Change Streams)
|
||||
$this->startIncrementalSync($databases, $taskConfig);
|
||||
|
||||
} catch (\Throwable $e) {
|
||||
LoggerHelper::logError($e, [
|
||||
'component' => 'DatabaseSyncHandler',
|
||||
'action' => 'collect',
|
||||
'task_id' => $taskId,
|
||||
]);
|
||||
throw $e;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取业务配置(从独立配置文件或使用默认值)
|
||||
*
|
||||
* @return array<string, mixed> 业务配置
|
||||
*/
|
||||
private function getBusinessConfig(): array
|
||||
{
|
||||
// 可以从独立配置文件读取,或使用默认值
|
||||
// 这里使用默认值,业务逻辑统一在代码中管理
|
||||
return [
|
||||
// 数据库同步配置
|
||||
'databases' => [], // 空数组表示同步所有数据库
|
||||
'exclude_databases' => ['admin', 'local', 'config'], // 排除的系统数据库
|
||||
'exclude_collections' => ['system.profile', 'system.js'], // 排除的系统集合
|
||||
|
||||
// Change Streams 配置
|
||||
'change_stream' => [
|
||||
'batch_size' => 100,
|
||||
'max_await_time_ms' => 1000,
|
||||
'full_sync_on_start' => true, // 首次启动时是否执行全量同步
|
||||
'full_sync_batch_size' => 1000,
|
||||
],
|
||||
|
||||
// 重试配置
|
||||
'retry' => [
|
||||
'max_connect_retries' => 10,
|
||||
'retry_interval' => 5,
|
||||
'max_sync_retries' => 3,
|
||||
'sync_retry_interval' => 2,
|
||||
],
|
||||
|
||||
// 性能配置
|
||||
'performance' => [
|
||||
'concurrent_databases' => 5,
|
||||
'concurrent_collections' => 10,
|
||||
'batch_write_size' => 5000,
|
||||
// 为了让断点续传逻辑简单可靠,这里关闭集合级并行同步
|
||||
// 后续如果需要再做更复杂的分片断点策略,可以重新打开
|
||||
'enable_parallel_sync' => false,
|
||||
'max_parallel_tasks_per_collection' => 4,
|
||||
'documents_per_task' => 100000,
|
||||
],
|
||||
|
||||
// 监控配置
|
||||
'monitoring' => [
|
||||
'log_sync' => true,
|
||||
'log_detail' => false,
|
||||
'stats_interval' => 10, // 每10秒输出一次进度日志
|
||||
],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建 DatabaseSyncService 实例
|
||||
*
|
||||
* @param array<string, mixed> $taskConfig 任务配置
|
||||
* @return DatabaseSyncService
|
||||
*/
|
||||
private function createSyncService(array $taskConfig): DatabaseSyncService
|
||||
{
|
||||
// 从数据库获取源和目标数据源配置
|
||||
$dataSourceService = new DataSourceService(new DataSourceRepository());
|
||||
$sourceDataSourceId = $taskConfig['source_data_source'] ?? 'kr_mongodb';
|
||||
$targetDataSourceId = $taskConfig['target_data_source'] ?? 'sync_mongodb';
|
||||
|
||||
$sourceConfig = $dataSourceService->getDataSourceConfigById($sourceDataSourceId);
|
||||
$targetConfig = $dataSourceService->getDataSourceConfigById($targetDataSourceId);
|
||||
|
||||
if (empty($sourceConfig) || empty($targetConfig)) {
|
||||
throw new \InvalidArgumentException("数据源配置不存在: source={$sourceDataSourceId}, target={$targetDataSourceId}");
|
||||
}
|
||||
|
||||
// 获取业务配置(统一在代码中管理)
|
||||
$businessConfig = $this->getBusinessConfig();
|
||||
|
||||
// 构建同步配置
|
||||
$syncConfig = [
|
||||
'enabled' => true,
|
||||
'source' => [
|
||||
'host' => $sourceConfig['host'],
|
||||
'port' => $sourceConfig['port'],
|
||||
'username' => $sourceConfig['username'] ?? '',
|
||||
'password' => $sourceConfig['password'] ?? '',
|
||||
'auth_source' => $sourceConfig['auth_source'] ?? 'admin',
|
||||
'options' => array_merge([
|
||||
'connectTimeoutMS' => 10000,
|
||||
'socketTimeoutMS' => 30000,
|
||||
'serverSelectionTimeoutMS' => 10000,
|
||||
'heartbeatFrequencyMS' => 10000,
|
||||
], $sourceConfig['options'] ?? []),
|
||||
],
|
||||
'target' => [
|
||||
'host' => $targetConfig['host'],
|
||||
'port' => $targetConfig['port'],
|
||||
'username' => $targetConfig['username'] ?? '',
|
||||
'password' => $targetConfig['password'] ?? '',
|
||||
'auth_source' => $targetConfig['auth_source'] ?? 'admin',
|
||||
'options' => array_merge([
|
||||
'connectTimeoutMS' => 10000,
|
||||
'socketTimeoutMS' => 30000,
|
||||
'serverSelectionTimeoutMS' => 10000,
|
||||
], $targetConfig['options'] ?? []),
|
||||
],
|
||||
'sync' => [
|
||||
'databases' => $businessConfig['databases'],
|
||||
'exclude_databases' => $businessConfig['exclude_databases'],
|
||||
'exclude_collections' => $businessConfig['exclude_collections'],
|
||||
'change_stream' => $businessConfig['change_stream'],
|
||||
'retry' => $businessConfig['retry'],
|
||||
'performance' => $businessConfig['performance'],
|
||||
],
|
||||
'monitoring' => $businessConfig['monitoring'],
|
||||
];
|
||||
|
||||
// 直接传递配置给 DatabaseSyncService 构造函数
|
||||
return new DatabaseSyncService($syncConfig);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取要同步的数据库列表
|
||||
*
|
||||
* @param array<string, mixed> $taskConfig 任务配置
|
||||
* @return array<string> 数据库名称列表
|
||||
*/
|
||||
private function getDatabasesToSync(array $taskConfig): array
|
||||
{
|
||||
return $this->syncService->getDatabasesToSync();
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行全量同步(支持多进程数据库级并行)
|
||||
*
|
||||
* @param array<string> $databases 数据库列表
|
||||
* @param array<string, mixed> $taskConfig 任务配置
|
||||
* @return void
|
||||
*/
|
||||
private function performFullSync(array $databases, array $taskConfig): void
|
||||
{
|
||||
// 获取 Worker 信息(用于多进程分配)
|
||||
$workerId = $taskConfig['worker_id'] ?? 0;
|
||||
$workerCount = $taskConfig['worker_count'] ?? 1;
|
||||
|
||||
// 分配数据库给当前 Worker(负载均衡算法)
|
||||
$assignedDatabases = $this->assignDatabasesToWorker($databases, $workerId, $workerCount);
|
||||
|
||||
LoggerHelper::logBusiness('database_sync_full_sync_start', [
|
||||
'worker_id' => $workerId,
|
||||
'worker_count' => $workerCount,
|
||||
'total_databases' => count($databases),
|
||||
'assigned_databases' => $assignedDatabases,
|
||||
'assigned_count' => count($assignedDatabases),
|
||||
]);
|
||||
|
||||
foreach ($assignedDatabases as $databaseName) {
|
||||
try {
|
||||
$this->syncService->fullSyncDatabase($databaseName);
|
||||
} catch (\Throwable $e) {
|
||||
LoggerHelper::logError($e, [
|
||||
'component' => 'DatabaseSyncHandler',
|
||||
'action' => 'performFullSync',
|
||||
'database' => $databaseName,
|
||||
'worker_id' => $workerId,
|
||||
]);
|
||||
// 继续同步其他数据库
|
||||
}
|
||||
}
|
||||
|
||||
LoggerHelper::logBusiness('database_sync_full_sync_completed', [
|
||||
'worker_id' => $workerId,
|
||||
'databases' => $assignedDatabases,
|
||||
]);
|
||||
}
|
||||
|
||||
/**
|
||||
* 分配数据库给当前 Worker(负载均衡算法)
|
||||
*
|
||||
* 策略:
|
||||
* 1. 按数据库大小排序(小库优先,提升完成感)
|
||||
* 2. 使用贪心算法:每次分配给当前负载最小的 Worker
|
||||
* 3. 考虑 Worker 当前处理的数据库数量
|
||||
*
|
||||
* @param array<string> $databases 数据库列表(已按大小排序)
|
||||
* @param int $workerId 当前 Worker ID
|
||||
* @param int $workerCount Worker 总数
|
||||
* @return array<string> 分配给当前 Worker 的数据库列表
|
||||
*/
|
||||
private function assignDatabasesToWorker(array $databases, int $workerId, int $workerCount): array
|
||||
{
|
||||
// 如果只有一个 Worker,返回所有数据库
|
||||
if ($workerCount <= 1) {
|
||||
return $databases;
|
||||
}
|
||||
|
||||
// 方案A:简单取模分配(快速实现)
|
||||
// 适用于数据库数量较多且大小相近的场景
|
||||
$assignedDatabases = [];
|
||||
foreach ($databases as $index => $databaseName) {
|
||||
if ($index % $workerCount === $workerId) {
|
||||
$assignedDatabases[] = $databaseName;
|
||||
}
|
||||
}
|
||||
|
||||
// 方案B:负载均衡分配(推荐,但需要数据库大小信息)
|
||||
// 由于 getDatabasesToSync 已经按大小排序,简单取模即可实现较好的负载均衡
|
||||
// 如果后续需要更精确的负载均衡,可以从 DatabaseSyncService 获取数据库大小信息
|
||||
|
||||
return $assignedDatabases;
|
||||
}
|
||||
|
||||
/**
|
||||
* 启动增量同步监听(支持多进程数据库级并行)
|
||||
*
|
||||
* @param array<string> $databases 数据库列表
|
||||
* @param array<string, mixed> $taskConfig 任务配置
|
||||
* @return void
|
||||
*/
|
||||
private function startIncrementalSync(array $databases, array $taskConfig): void
|
||||
{
|
||||
// 获取 Worker 信息(用于多进程分配)
|
||||
$workerId = $taskConfig['worker_id'] ?? 0;
|
||||
$workerCount = $taskConfig['worker_count'] ?? 1;
|
||||
|
||||
// 分配数据库给当前 Worker(与全量同步使用相同的分配策略)
|
||||
$assignedDatabases = $this->assignDatabasesToWorker($databases, $workerId, $workerCount);
|
||||
|
||||
LoggerHelper::logBusiness('database_sync_incremental_sync_start', [
|
||||
'worker_id' => $workerId,
|
||||
'worker_count' => $workerCount,
|
||||
'total_databases' => count($databases),
|
||||
'assigned_databases' => $assignedDatabases,
|
||||
'assigned_count' => count($assignedDatabases),
|
||||
]);
|
||||
|
||||
// 为分配给当前 Worker 的数据库启动监听(在后台进程中)
|
||||
foreach ($assignedDatabases as $databaseName) {
|
||||
// 使用 Timer 在后台启动监听,避免阻塞
|
||||
\Workerman\Timer::add(0, function () use ($databaseName) {
|
||||
try {
|
||||
$this->syncService->watchDatabase($databaseName);
|
||||
} catch (\Throwable $e) {
|
||||
LoggerHelper::logError($e, [
|
||||
'component' => 'DatabaseSyncHandler',
|
||||
'action' => 'startIncrementalSync',
|
||||
'database' => $databaseName,
|
||||
]);
|
||||
|
||||
// 重试逻辑(从业务配置中获取)
|
||||
$businessConfig = $this->getBusinessConfig();
|
||||
$retryConfig = $businessConfig['retry'] ?? [];
|
||||
$maxRetries = $retryConfig['max_connect_retries'] ?? 10;
|
||||
$retryInterval = $retryConfig['retry_interval'] ?? 5;
|
||||
|
||||
static $retryCount = [];
|
||||
if (!isset($retryCount[$databaseName])) {
|
||||
$retryCount[$databaseName] = 0;
|
||||
}
|
||||
|
||||
if ($retryCount[$databaseName] < $maxRetries) {
|
||||
$retryCount[$databaseName]++;
|
||||
\Workerman\Timer::add($retryInterval, function () use ($databaseName) {
|
||||
$this->startIncrementalSync([$databaseName], $this->taskConfig);
|
||||
}, [], false);
|
||||
}
|
||||
}
|
||||
}, [], false);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 启动进度日志定时器
|
||||
*
|
||||
* @param array<string, mixed> $taskConfig 任务配置
|
||||
* @return void
|
||||
*/
|
||||
private function startProgressTimer(array $taskConfig): void
|
||||
{
|
||||
$businessConfig = $this->getBusinessConfig();
|
||||
$statsInterval = $businessConfig['monitoring']['stats_interval'] ?? 10; // 默认10秒输出一次进度
|
||||
|
||||
// 使用 Workerman Timer 定期输出进度
|
||||
$this->progressTimerId = \Workerman\Timer::add($statsInterval, function () use ($taskConfig) {
|
||||
try {
|
||||
// 重新加载最新进度(从文件读取)
|
||||
$this->syncService->loadProgress();
|
||||
$progress = $this->syncService->getProgress();
|
||||
$stats = $this->syncService->getStats();
|
||||
|
||||
// 输出格式化的进度信息
|
||||
$progressInfo = [
|
||||
'task_id' => $taskConfig['task_id'] ?? '',
|
||||
'task_name' => $taskConfig['name'] ?? '数据库同步',
|
||||
'status' => $progress['status'],
|
||||
'progress_percent' => $progress['progress_percent'] . '%',
|
||||
'current_database' => $progress['current_database'] ?? '无',
|
||||
'current_collection' => $progress['current_collection'] ?? '无',
|
||||
'databases' => "{$progress['databases']['completed']}/{$progress['databases']['total']}",
|
||||
'collections' => "{$progress['collections']['completed']}/{$progress['collections']['total']}",
|
||||
'documents' => "{$progress['documents']['processed']}/{$progress['documents']['total']}",
|
||||
'documents_inserted' => $stats['documents_inserted'],
|
||||
'documents_updated' => $stats['documents_updated'],
|
||||
'documents_deleted' => $stats['documents_deleted'],
|
||||
'errors' => $stats['errors'],
|
||||
'elapsed_time' => round($progress['time']['elapsed_seconds'], 2) . 's',
|
||||
'estimated_remaining' => $progress['time']['estimated_remaining_seconds']
|
||||
? round($progress['time']['estimated_remaining_seconds'], 2) . 's'
|
||||
: '计算中...',
|
||||
];
|
||||
|
||||
// 输出到日志
|
||||
LoggerHelper::logBusiness('database_sync_progress_report', $progressInfo);
|
||||
|
||||
// 如果状态是错误,输出错误信息
|
||||
if ($progress['status'] === 'error' && isset($progress['last_error'])) {
|
||||
LoggerHelper::logBusiness('database_sync_error_info', [
|
||||
'error_message' => $progress['last_error']['message'] ?? '未知错误',
|
||||
'error_database' => $progress['error_database'] ?? '未知',
|
||||
'error_collection' => $progress['last_error']['collection'] ?? '未知',
|
||||
]);
|
||||
}
|
||||
} catch (\Throwable $e) {
|
||||
LoggerHelper::logError($e, [
|
||||
'component' => 'DatabaseSyncHandler',
|
||||
'action' => 'startProgressTimer',
|
||||
]);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* 停止进度日志定时器
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function stopProgressTimer(): void
|
||||
{
|
||||
if ($this->progressTimerId > 0) {
|
||||
\Workerman\Timer::del($this->progressTimerId);
|
||||
$this->progressTimerId = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,74 @@
|
||||
<?php
|
||||
|
||||
namespace app\service\DataCollection\Handler;
|
||||
|
||||
use app\service\TagTaskService;
|
||||
use app\repository\TagTaskRepository;
|
||||
use app\repository\TagTaskExecutionRepository;
|
||||
use app\repository\UserProfileRepository;
|
||||
use app\service\TagService;
|
||||
use app\repository\TagDefinitionRepository;
|
||||
use app\repository\UserTagRepository;
|
||||
use app\repository\TagHistoryRepository;
|
||||
use app\service\TagRuleEngine\SimpleRuleEngine;
|
||||
use app\utils\LoggerHelper;
|
||||
|
||||
/**
|
||||
* 标签任务处理类
|
||||
*
|
||||
* 职责:
|
||||
* - 执行标签计算任务
|
||||
* - 批量遍历用户数据打标签
|
||||
*/
|
||||
class TagTaskHandler
|
||||
{
|
||||
/**
|
||||
* 执行标签任务
|
||||
*
|
||||
* @param mixed $adapter 数据源适配器(标签任务不需要)
|
||||
* @param array<string, mixed> $taskConfig 任务配置
|
||||
* @return void
|
||||
*/
|
||||
public function collect($adapter, array $taskConfig): void
|
||||
{
|
||||
$taskId = $taskConfig['task_id'] ?? '';
|
||||
$taskName = $taskConfig['name'] ?? '标签任务';
|
||||
|
||||
LoggerHelper::logBusiness('tag_task_handler_started', [
|
||||
'task_id' => $taskId,
|
||||
'task_name' => $taskName,
|
||||
]);
|
||||
|
||||
try {
|
||||
// 创建TagTaskService实例
|
||||
$tagTaskService = new TagTaskService(
|
||||
new TagTaskRepository(),
|
||||
new TagTaskExecutionRepository(),
|
||||
new UserProfileRepository(),
|
||||
new TagService(
|
||||
new TagDefinitionRepository(),
|
||||
new UserProfileRepository(),
|
||||
new UserTagRepository(),
|
||||
new TagHistoryRepository(),
|
||||
new SimpleRuleEngine()
|
||||
)
|
||||
);
|
||||
|
||||
// 执行任务
|
||||
$tagTaskService->executeTask($taskId);
|
||||
|
||||
LoggerHelper::logBusiness('tag_task_handler_completed', [
|
||||
'task_id' => $taskId,
|
||||
'task_name' => $taskName,
|
||||
]);
|
||||
} catch (\Throwable $e) {
|
||||
LoggerHelper::logError($e, [
|
||||
'component' => 'TagTaskHandler',
|
||||
'action' => 'collect',
|
||||
'task_id' => $taskId,
|
||||
]);
|
||||
throw $e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,172 @@
|
||||
<?php
|
||||
|
||||
namespace app\service\DataCollection\Handler\Trait;
|
||||
|
||||
use MongoDB\BSON\UTCDateTime;
|
||||
|
||||
/**
|
||||
* 数据采集辅助方法 Trait
|
||||
*
|
||||
* 提供通用的工具方法给各个 Handler 使用
|
||||
*/
|
||||
trait DataCollectionHelperTrait
|
||||
{
|
||||
/**
|
||||
* 将 MongoDB 文档转换为数组
|
||||
*
|
||||
* @param mixed $document MongoDB 文档对象或数组
|
||||
* @return array<string, mixed> 数组格式的数据
|
||||
*/
|
||||
protected function convertMongoDocumentToArray($document): array
|
||||
{
|
||||
if (is_array($document)) {
|
||||
return $document;
|
||||
}
|
||||
|
||||
if (is_object($document) && method_exists($document, 'toArray')) {
|
||||
return $document->toArray();
|
||||
}
|
||||
|
||||
return json_decode(json_encode($document), true) ?? [];
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析日期时间字符串
|
||||
*
|
||||
* @param mixed $dateTimeStr 日期时间字符串或对象
|
||||
* @return \DateTimeImmutable|null 解析后的日期时间对象
|
||||
*/
|
||||
protected function parseDateTime($dateTimeStr): ?\DateTimeImmutable
|
||||
{
|
||||
if (empty($dateTimeStr)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// 如果是 MongoDB 的 UTCDateTime 对象
|
||||
if ($dateTimeStr instanceof UTCDateTime) {
|
||||
return \DateTimeImmutable::createFromMutable($dateTimeStr->toDateTime());
|
||||
}
|
||||
|
||||
// 如果是 DateTime 对象
|
||||
if ($dateTimeStr instanceof \DateTime || $dateTimeStr instanceof \DateTimeImmutable) {
|
||||
if ($dateTimeStr instanceof \DateTime) {
|
||||
return \DateTimeImmutable::createFromMutable($dateTimeStr);
|
||||
}
|
||||
return $dateTimeStr;
|
||||
}
|
||||
|
||||
// 尝试解析字符串
|
||||
try {
|
||||
return new \DateTimeImmutable((string)$dateTimeStr);
|
||||
} catch (\Exception $e) {
|
||||
\app\utils\LoggerHelper::logBusiness('datetime_parse_failed', [
|
||||
'input' => $dateTimeStr,
|
||||
'error' => $e->getMessage(),
|
||||
]);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析金额
|
||||
*
|
||||
* @param mixed $amount 金额字符串或数字
|
||||
* @return float 解析后的金额
|
||||
*/
|
||||
protected function parseAmount($amount): float
|
||||
{
|
||||
if (is_numeric($amount)) {
|
||||
return (float)$amount;
|
||||
}
|
||||
|
||||
if (is_string($amount)) {
|
||||
// 移除所有非数字字符(除了小数点)
|
||||
$cleaned = preg_replace('/[^\d.]/', '', $amount);
|
||||
return (float)$cleaned;
|
||||
}
|
||||
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
/**
|
||||
* 过滤手机号中的非数字字符
|
||||
*
|
||||
* @param string $phoneNumber 原始手机号
|
||||
* @return string 过滤后的手机号(只包含数字)
|
||||
*/
|
||||
protected function filterPhoneNumber(string $phoneNumber): string
|
||||
{
|
||||
// 移除所有非数字字符
|
||||
return preg_replace('/\D/', '', $phoneNumber);
|
||||
}
|
||||
|
||||
/**
|
||||
* 验证手机号格式
|
||||
*
|
||||
* @param string $phone 手机号(已经过滤过非数字字符)
|
||||
* @return bool 是否有效(11位数字,1开头)
|
||||
*/
|
||||
protected function isValidPhone(string $phone): bool
|
||||
{
|
||||
// 如果为空,直接返回 false
|
||||
if (empty($phone)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// 中国大陆手机号:11位数字,以1开头
|
||||
return preg_match('/^1[3-9]\d{9}$/', $phone) === 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据消费时间生成月份集合名
|
||||
*
|
||||
* @param string $baseCollectionName 基础集合名
|
||||
* @param mixed $dateTimeStr 日期时间字符串或对象
|
||||
* @return string 带月份后缀的集合名(如:consumption_records_202512)
|
||||
*/
|
||||
protected function getMonthlyCollectionName(string $baseCollectionName, $dateTimeStr = null): string
|
||||
{
|
||||
$consumeTime = $this->parseDateTime($dateTimeStr);
|
||||
if ($consumeTime === null) {
|
||||
$consumeTime = new \DateTimeImmutable();
|
||||
}
|
||||
$monthSuffix = $consumeTime->format('Ym');
|
||||
return "{$baseCollectionName}_{$monthSuffix}";
|
||||
}
|
||||
|
||||
/**
|
||||
* 转换为 MongoDB UTCDateTime
|
||||
*
|
||||
* @param mixed $dateTimeStr 日期时间字符串或对象
|
||||
* @return UTCDateTime|null MongoDB UTCDateTime 对象
|
||||
*/
|
||||
protected function convertToUTCDateTime($dateTimeStr): ?UTCDateTime
|
||||
{
|
||||
if (empty($dateTimeStr)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// 如果已经是 UTCDateTime,直接返回
|
||||
if ($dateTimeStr instanceof UTCDateTime) {
|
||||
return $dateTimeStr;
|
||||
}
|
||||
|
||||
// 如果是 DateTime 对象
|
||||
if ($dateTimeStr instanceof \DateTime || $dateTimeStr instanceof \DateTimeImmutable) {
|
||||
return new UTCDateTime($dateTimeStr->getTimestamp() * 1000);
|
||||
}
|
||||
|
||||
// 尝试解析字符串
|
||||
try {
|
||||
$dateTime = new \DateTimeImmutable((string)$dateTimeStr);
|
||||
return new UTCDateTime($dateTime->getTimestamp() * 1000);
|
||||
} catch (\Exception $e) {
|
||||
\app\utils\LoggerHelper::logBusiness('convert_to_utcdatetime_failed', [
|
||||
'input' => $dateTimeStr,
|
||||
'error' => $e->getMessage(),
|
||||
]);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user