修复内存溢出问题

2025-10-19 15:33:54 +08:00 · 2025-10-19 15:33:54 +08:00 · 3f578c03e8
commit 3f578c03e8
parent 23b7b99dc6
21 changed files with 15003 additions and 1005 deletions
--- a/config/pycache/tf_api_config.cpython-312.pyc
+++ b/config/pycache/tf_api_config.cpython-312.pyc
--- a/config/settings.py
+++ b/config/settings.py
@ -245,10 +245,10 @@ class BaseConfig(BaseSettings):
    DB_NAME: str = Field(default=_db_config['database'], env="DB_NAME")
    DB_CHARSET: str = Field(default=_db_config['charset'], env="DB_CHARSET")
    DB_ECHO: bool = False  # 是否输出SQL语句
-    DB_POOL_SIZE: int = 50  # 增加连接池基础大小
-    DB_MAX_OVERFLOW: int = 100  # 增加溢出连接数
-    DB_POOL_RECYCLE: int = 1800  # 减少连接回收时间，防止连接过期
-    DB_POOL_TIMEOUT: int = 60  # 获取连接的超时时间
+    DB_POOL_SIZE: int = 10  # 连接池基础大小（优化：从50降到10，节省内存）
+    DB_MAX_OVERFLOW: int = 20  # 溢出连接数（优化：从100降到20，节省内存）
+    DB_POOL_RECYCLE: int = 1800  # 连接回收时间（30分钟），防止连接过期
+    DB_POOL_TIMEOUT: int = 30  # 获取连接的超时时间（优化：从60降到30秒）
    DB_POOL_PRE_PING: bool = True  # 连接前检测连接可用性
    
    # Redis配置
--- a/config/tf_api_config.py
+++ b/config/tf_api_config.py
@ -62,7 +62,7 @@ TASK_TYPE_AREA={
    "LG": "AGW/PL"
 }
 # 从环境变量读取配置，或使用默认值
-TF_API_BASE_URL = os.getenv("TF_API_BASE_URL", "http://192.168.189.206:8080/jeecg-boot")
+TF_API_BASE_URL = os.getenv("TF_API_BASE_URL", "http://192.168.189.187:8080/jeecg-boot")
 # TF_API_BASE_URL = os.getenv("TF_API_BASE_URL", "http://111.231.146.230:4080/jeecg-boot")
 TF_API_TIMEOUT = int(os.getenv("TF_API_TIMEOUT", "10"))  # 减少超时时间从60秒到10秒
 TF_API_RETRY_TIMES = int(os.getenv("TF_API_RETRY_TIMES", "3"))
--- a/docs/select_agv_output_update.md
+++ b/docs/select_agv_output_update.md
@ -0,0 +1,89 @@
+# 选择机器人块输出参数立即更新说明
+
+## 修改背景
+
+选择机器人块（CSelectAgvBp）和机器人执行动作块（CAgvOperationBp）是嵌套关系。之前的实现中，虽然选择机器人块在选择完成后状态就显示为成功，但是输出内容（选择出的机器人结果）要等到整个嵌套的动作块执行完成后才会更新到表中，在任务记录详情里才能显示出来。这不符合实际的显示需求。
+
+## 修改内容
+
+### 1. 修改 `RobotBlockHandler.update_block_record` 方法
+**文件**: `services/execution/handlers/robot_scheduling.py`
+
+**修改点**:
+- 增加 `block_name` 和 `output_data` 参数
+- 在选择机器人成功后，立即更新块记录的输出参数到数据库
+- 构建完整的输出结构：`{"blocks": {"块名称": {"selectedAgvId": "机器人名称"}}}`
+- 同时更新 `output_params` 和 `block_out_params_value` 字段
+
+### 2. 修改选择机器人块的调用逻辑
+**文件**: `services/execution/handlers/robot_scheduling.py`
+
+**修改点**:
+- 在选择机器人成功后（第897-902行），立即调用 `update_block_record` 方法
+- 传入块名称和输出数据（包含选择的机器人名称）
+- 这样用户可以立即在任务记录详情中看到选择的机器人
+
+## 执行流程
+
+### 修改前的流程
+1. 选择机器人块开始执行
+2. 调用天风系统接口选择机器人
+3. 等待机器人选择完成
+4. 更新块记录状态为成功（但不更新输出参数）
+5. 执行嵌套的动作块
+6. **等待所有子块执行完成**
+7. **最后才更新输出参数到数据库**
+8. 用户才能看到选择的机器人
+
+### 修改后的流程
+1. 选择机器人块开始执行
+2. 调用天风系统接口选择机器人
+3. 等待机器人选择完成
+4. **立即更新块记录状态为成功，并更新输出参数到数据库**
+5. **用户立即可以在任务记录详情中看到选择的机器人**
+6. 执行嵌套的动作块
+7. 所有子块执行完成后，再次更新输出参数（内容相同，不影响显示）
+
+## 技术细节
+
+### 输出参数格式
+```json
+{
+  "blocks": {
+    "块名称": {
+      "selectedAgvId": "机器人名称"
+    }
+  }
+}
+```
+
+### 数据库字段
+- `output_params`: 完整的输出结构（包含 blocks 层级）
+- `block_out_params_value`: 输出参数值（只包含选择的机器人信息）
+
+## 注意事项
+
+1. **输出参数会被更新两次**：第一次在选择机器人完成时立即更新，第二次在所有子块执行完成后再次更新。两次更新的内容相同，不会造成问题。
+
+2. **不影响子块执行**：立即更新输出参数不会影响子块的执行逻辑，子块仍然可以正常访问 context 中的机器人信息。
+
+3. **兼容性**：这个修改不会影响其他类型的块，只针对选择机器人块（CSelectAgvBp）。
+
+## 测试建议
+
+1. 启动一个包含选择机器人块和嵌套动作块的任务
+2. 在选择机器人完成后，立即查询任务记录详情
+3. 验证能否看到选择的机器人信息
+4. 等待动作块执行完成后，再次查询任务记录详情
+5. 验证机器人信息保持一致
+
+## 相关文件
+
+- `services/execution/handlers/robot_scheduling.py` - 选择机器人块处理器
+- `services/execution/block_executor.py` - 块执行器
+- `data/models/blockrecord.py` - 块记录数据模型
+
+## 修改日期
+
+2025-10-17
+
--- a/logs/app.log
+++ b/logs/app.log
--- a/logs/app.log.2025-10-16
+++ b/logs/app.log.2025-10-16
--- a/routes/external_task_api.py
+++ b/routes/external_task_api.py
@ -9,9 +9,10 @@
 import json
 import asyncio
 import aiohttp
-from typing import Dict, Any
+from typing import Dict, Any, Set, Optional
 from fastapi import APIRouter, Body, Request, Path
-from routes.model.external_task_model import ExternalTaskRequest, ExternalTaskResponse, TaskTypeEnum, GenAgvSchedulingTaskRequest, CancelTaskRequest
+from routes.model.external_task_model import ExternalTaskRequest, ExternalTaskResponse, TaskTypeEnum, \
+    GenAgvSchedulingTaskRequest, CancelTaskRequest
 from routes.model.task_edit_model import TaskEditRunRequest, TaskInputParamNew, InputParamType
 from services.task_edit_service import TaskEditService
 from services.external_task_record_service import ExternalTaskRecordService
@ -19,10 +20,10 @@ from services.task_record_service import TaskRecordService
 from services.sync_service import set_task_terminated, get_login_token, refresh_token_if_needed
 from routes.common_api import format_response, error_response
 from utils.logger import get_logger
-from utils.background_task_manager import create_background_task
 from data.enum.task_record_enum import SourceType, TaskStatus
 from data.models.external_task_record import ExternalTaskStatusEnum
-from config.tf_api_config import TF_API_TOKEN, TF_API_BASE_URL, CM_ID, DG_ID, TASK_TYPE_PRIORITY, TASK_TYPE_AREA, TF_WEB_POST, sync_disabled_label
+from config.tf_api_config import TF_API_TOKEN, TF_API_BASE_URL, CM_ID, DG_ID, TASK_TYPE_PRIORITY, TASK_TYPE_AREA, \
+    TF_WEB_POST, sync_disabled_label

 # 创建路由
 router = APIRouter(
@ -33,15 +34,130 @@ router = APIRouter(
 # 设置日志
 logger = get_logger("app.external_task_api")

+# 后台监控任务集合，用于追踪和清理
+_background_monitor_tasks: Set[asyncio.Task] = set()
+
+# 全局HTTP会话，用于复用连接池
+_global_http_session: Optional[aiohttp.ClientSession] = None
+_session_lock = asyncio.Lock()  # 确保线程安全
+
+
+async def get_http_session() -> aiohttp.ClientSession:
+    """
+    获取全局HTTP会话，如果不存在则创建
+    使用连接池复用，减少内存占用和TCP连接数
+
+    Returns:
+        aiohttp.ClientSession: 全局HTTP会话对象
+    """
+    global _global_http_session
+
+    async with _session_lock:
+        if _global_http_session is None or _global_http_session.closed:
+            # 配置连接器
+            connector = aiohttp.TCPConnector(
+                limit=100,  # 总连接数限制
+                limit_per_host=30,  # 每个主机的连接数限制
+                ttl_dns_cache=300,  # DNS缓存时间（秒）
+                force_close=False,  # 允许连接复用
+                enable_cleanup_closed=True  # 启用清理关闭的连接
+            )
+
+            # 配置超时
+            timeout = aiohttp.ClientTimeout(
+                total=60,  # 总超时时间
+                connect=10,  # 连接超时
+                sock_read=30  # 读取超时
+            )
+
+            _global_http_session = aiohttp.ClientSession(
+                connector=connector,
+                timeout=timeout,
+                raise_for_status=False  # 不自动抛出HTTP错误
+            )
+            logger.info("创建全局HTTP会话，启用连接池复用")
+
+    return _global_http_session
+
+
+async def close_http_session() -> None:
+    """
+    关闭全局HTTP会话（应用关闭时调用）
+    """
+    global _global_http_session
+
+    if _global_http_session and not _global_http_session.closed:
+        await _global_http_session.close()
+        logger.info("已关闭全局HTTP会话")
+        _global_http_session = None
+        # 等待连接完全关闭
+        await asyncio.sleep(0.25)
+
+
+def _cleanup_background_task(task: asyncio.Task) -> None:
+    """
+    清理完成的后台任务
+    这个回调函数会在任务完成、取消或异常时自动调用
+    """
+    _background_monitor_tasks.discard(task)
+
+    # 记录任务完成状态
+    try:
+        if task.cancelled():
+            logger.debug(f"后台监控任务已取消，已清理。当前活跃任务数: {len(_background_monitor_tasks)}")
+        elif task.exception():
+            logger.error(
+                f"后台监控任务异常结束: {task.exception()}，已清理。当前活跃任务数: {len(_background_monitor_tasks)}")
+        else:
+            logger.debug(f"后台监控任务正常完成，已清理。当前活跃任务数: {len(_background_monitor_tasks)}")
+    except Exception as e:
+        logger.error(f"清理后台任务时出错: {str(e)}")
+
+
+def get_active_monitor_tasks_count() -> int:
+    """
+    获取当前活跃的监控任务数量
+    可用于监控和调试
+
+    Returns:
+        int: 当前活跃的监控任务数量
+    """
+    return len(_background_monitor_tasks)
+
+
+async def cancel_all_monitor_tasks() -> int:
+    """
+    取消所有活跃的监控任务（仅用于应用关闭时）
+
+    Returns:
+        int: 被取消的任务数量
+    """
+    count = len(_background_monitor_tasks)
+    if count > 0:
+        logger.info(f"正在取消 {count} 个活跃的监控任务...")
+        # 复制集合以避免在迭代时修改
+        tasks_to_cancel = list(_background_monitor_tasks)
+        for task in tasks_to_cancel:
+            if not task.done():
+                task.cancel()
+
+        # 等待所有任务完成取消
+        if tasks_to_cancel:
+            await asyncio.gather(*tasks_to_cancel, return_exceptions=True)
+
+        logger.info(f"已取消所有监控任务，共 {count} 个")
+    return count
+
+
 # 外部回调接口URL
-EXTERNAL_CALLBACK_URL = "http://roh.vwfawedl.mobi:9001/AGVService/ContainerSendBackRequest" # 生产线到毛坯库任务
-AGV_GOODS_MOVE_URL = "http://roh.vwfawedl.mobi:9001/AGVService/HUGoodsMoveRequest" # 毛坯库到产线任务
+EXTERNAL_CALLBACK_URL = "http://roh.vwfawedl.mobi:9001/AGVService/ContainerSendBackRequest"  # 生产线到毛坯库任务
+AGV_GOODS_MOVE_URL = "http://roh.vwfawedl.mobi:9001/AGVService/HUGoodsMoveRequest"  # 毛坯库到产线任务


 async def get_tf_api_token() -> str:
    """
    获取TF API Token，优先使用动态获取的token，失败时使用默认值
-    
+
    Returns:
        str: 可用的API token
    """
@ -53,7 +169,7 @@ async def get_tf_api_token() -> str:
            return token
    except Exception as e:
        logger.warning(f"获取动态token失败: {str(e)}")
-    
+
    # 如果获取失败，使用配置中的默认token
    logger.info("使用默认配置中的token")
    return TF_API_TOKEN
@ -61,7 +177,7 @@ async def get_tf_api_token() -> str:

 async def call_external_callback(arrival_no: str, arrival_user: str = "000307") -> bool:
    """
-    调用外部回调接口
+    调用外部回调接口（使用全局Session复用连接池）

    Args:
        arrival_no: 到货编号（ReqCode）
@ -80,23 +196,24 @@ async def call_external_callback(arrival_no: str, arrival_user: str = "000307")

    while retry_count < max_retries:
        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.post(EXTERNAL_CALLBACK_URL, json=payload) as response:
-                    result = await response.json()
-                    logger.info(f"外部接口调用响应: {result}, arrival_no={arrival_no}, 重试次数={retry_count}")
+            # 使用全局HTTP会话，复用连接池
+            session = await get_http_session()
+            async with session.post(EXTERNAL_CALLBACK_URL, json=payload) as response:
+                result = await response.json()
+                logger.info(f"外部接口调用响应: {result}, arrival_no={arrival_no}, 重试次数={retry_count}")

-                    # 检查响应结果
-                    if result.get("result") == "0":
-                        logger.info(f"外部接口调用成功: arrival_no={arrival_no}, 总重试次数={retry_count}")
-                        return True
-                    elif result.get("result") == "1":
-                        logger.info(f"外部接口返回result=1，继续重试: arrival_no={arrival_no}, 重试次数={retry_count}")
-                        retry_count += 1
-                        await asyncio.sleep(5)  # 等待5秒后重试
-                    else:
-                        logger.error(f"外部接口返回异常结果: {result}, arrival_no={arrival_no}")
-                        retry_count += 1
-                        await asyncio.sleep(5)
+                # 检查响应结果
+                if result.get("result") == "0":
+                    logger.info(f"外部接口调用成功: arrival_no={arrival_no}, 总重试次数={retry_count}")
+                    return True
+                elif result.get("result") == "1":
+                    logger.info(f"外部接口返回result=1，继续重试: arrival_no={arrival_no}, 重试次数={retry_count}")
+                    retry_count += 1
+                    await asyncio.sleep(5)  # 等待5秒后重试
+                else:
+                    logger.error(f"外部接口返回异常结果: {result}, arrival_no={arrival_no}")
+                    retry_count += 1
+                    await asyncio.sleep(5)

        except Exception as e:
            logger.error(f"调用外部接口异常: {str(e)}, arrival_no={arrival_no}, 重试次数={retry_count}")
@ -106,9 +223,10 @@ async def call_external_callback(arrival_no: str, arrival_user: str = "000307")
    logger.error(f"外部接口调用失败，已达到最大重试次数: arrival_no={arrival_no}, 最大重试次数={max_retries}")
    return False

+
 async def call_agv_goods_move_callback(pid: str, user_id: str = "000307") -> bool:
    """
-    调用AGV货物移动回调接口
+    调用AGV货物移动回调接口（使用全局Session复用连接池）

    Args:
        pid: 对应的req_code
@ -127,23 +245,24 @@ async def call_agv_goods_move_callback(pid: str, user_id: str = "000307") -> boo

    while retry_count < max_retries:
        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.post(AGV_GOODS_MOVE_URL, json=payload) as response:
-                    result = await response.json()
-                    logger.info(f"AGV货物移动接口调用响应: {result}, PID={pid}, 重试次数={retry_count}")
+            # 使用全局HTTP会话，复用连接池
+            session = await get_http_session()
+            async with session.post(AGV_GOODS_MOVE_URL, json=payload) as response:
+                result = await response.json()
+                logger.info(f"AGV货物移动接口调用响应: {result}, PID={pid}, 重试次数={retry_count}")

-                    # 检查响应结果
-                    if result.get("result") == "0":
-                        logger.info(f"AGV货物移动接口调用成功: PID={pid}, 总重试次数={retry_count}")
-                        return True
-                    elif result.get("result") == "1":
-                        logger.info(f"AGV货物移动接口返回result=1，继续重试: PID={pid}, 重试次数={retry_count}")
-                        retry_count += 1
-                        await asyncio.sleep(5)  # 等待5秒后重试
-                    else:
-                        logger.error(f"AGV货物移动接口返回异常结果: {result}, PID={pid}")
-                        retry_count += 1
-                        await asyncio.sleep(5)
+                # 检查响应结果
+                if result.get("result") == "0":
+                    logger.info(f"AGV货物移动接口调用成功: PID={pid}, 总重试次数={retry_count}")
+                    return True
+                elif result.get("result") == "1":
+                    logger.info(f"AGV货物移动接口返回result=1，继续重试: PID={pid}, 重试次数={retry_count}")
+                    retry_count += 1
+                    await asyncio.sleep(5)  # 等待5秒后重试
+                else:
+                    logger.error(f"AGV货物移动接口返回异常结果: {result}, PID={pid}")
+                    retry_count += 1
+                    await asyncio.sleep(5)

        except Exception as e:
            logger.error(f"调用AGV货物移动接口异常: {str(e)}, PID={pid}, 重试次数={retry_count}")
@ -153,6 +272,7 @@ async def call_agv_goods_move_callback(pid: str, user_id: str = "000307") -> boo
    logger.error(f"AGV货物移动接口调用失败，已达到最大重试次数: PID={pid}, 最大重试次数={max_retries}")
    return False

+
 async def monitor_task_and_callback(task_record_id: str, req_code: str):
    """
    异步监控任务状态并在成功时调用外部回调接口
@ -163,50 +283,54 @@ async def monitor_task_and_callback(task_record_id: str, req_code: str):
    """
    logger.info(f"开始监控任务状态: task_record_id={task_record_id}, req_code={req_code}")

-    # max_wait_time = 1800  # 最大等待时间30分钟
-    # wait_count = 0
+    try:
+        while True:
+            try:
+                task_detail_result = await TaskRecordService.get_task_record_detail(task_record_id)

-    while True:
-        try:
-            task_detail_result = await TaskRecordService.get_task_record_detail(task_record_id)
+                if task_detail_result.get("success", False):
+                    task_detail = task_detail_result.get("data", {})
+                    task_status = task_detail.get("status", "")

-            if task_detail_result.get("success", False):
-                task_detail = task_detail_result.get("data", {})
-                task_status = task_detail.get("status", "")
+                    logger.info(f"监控任务状态: task_record_id={task_record_id}, status={task_status}")

-                logger.info(f"监控任务状态: task_record_id={task_record_id}, status={task_status}")
+                    # 如果任务已完成（成功）
+                    if task_status == TaskStatus.COMPLETED:
+                        logger.info(
+                            f"任务执行成功，开始调用外部回调接口: task_record_id={task_record_id}, req_code={req_code}")
+                        # 调用外部回调接口
+                        success = await call_external_callback(req_code)
+                        if success:
+                            logger.info(f"外部回调接口调用成功: task_record_id={task_record_id}, req_code={req_code}")
+                        else:
+                            logger.error(f"外部回调接口调用失败: task_record_id={task_record_id}, req_code={req_code}")
+                        break

-                # 如果任务已完成（成功）
-                if task_status == TaskStatus.COMPLETED:
-                    logger.info(f"任务执行成功，开始调用外部回调接口: task_record_id={task_record_id}, req_code={req_code}")
-                    # 调用外部回调接口
-                    success = await call_external_callback(req_code)
-                    if success:
-                        logger.info(f"外部回调接口调用成功: task_record_id={task_record_id}, req_code={req_code}")
+                    # 如果任务已失败或取消
+                    elif task_status in [TaskStatus.FAILED, TaskStatus.CANCELED]:
+                        logger.info(
+                            f"任务执行失败或取消，不调用外部回调接口: task_record_id={task_record_id}, status={task_status}")
+                        break
+
+                    # 任务还在运行中，继续等待
                    else:
-                        logger.error(f"外部回调接口调用失败: task_record_id={task_record_id}, req_code={req_code}")
-                    break
+                        logger.debug(f"任务仍在执行中，继续等待: task_record_id={task_record_id}, status={task_status}")
+                        await asyncio.sleep(2)

-                # 如果任务已失败或取消
-                elif task_status in [TaskStatus.FAILED, TaskStatus.CANCELED]:
-                    logger.info(f"任务执行失败或取消，不调用外部回调接口: task_record_id={task_record_id}, status={task_status}")
-                    break
-
-                # 任务还在运行中，继续等待
                else:
-                    logger.debug(f"任务仍在执行中，继续等待: task_record_id={task_record_id}, status={task_status}")
-                    await asyncio.sleep(2)  # 等待10秒
-                    # wait_count += 10
+                    logger.warning(f"无法获取任务详情，继续等待: task_record_id={task_record_id}")
+                    await asyncio.sleep(2)

-            else:
-                logger.warning(f"无法获取任务详情，继续等待: task_record_id={task_record_id}")
-                await asyncio.sleep(2)  # 等待10秒
-                # wait_count += 10
+            except asyncio.CancelledError:
+                logger.info(f"监控任务被取消: task_record_id={task_record_id}, req_code={req_code}")
+                raise  # 重新抛出以便正确处理取消
+            except Exception as e:
+                logger.error(f"监控任务状态时出现异常: {str(e)}, task_record_id={task_record_id}")
+                await asyncio.sleep(2)
+    finally:
+        # 确保释放所有资源
+        logger.info(f"监控任务结束，清理资源: task_record_id={task_record_id}, req_code={req_code}")

-        except Exception as e:
-            logger.error(f"监控任务状态时出现异常: {str(e)}, task_record_id={task_record_id}")
-            await asyncio.sleep(2)  # 等待10秒
-            # wait_count += 10

 async def monitor_agv_task_and_callback(task_record_id: str, req_code: str):
    """
@ -218,55 +342,68 @@ async def monitor_agv_task_and_callback(task_record_id: str, req_code: str):
    """
    logger.info(f"开始监控AGV调度任务状态: task_record_id={task_record_id}, req_code={req_code}")

-    while True:
-        try:
-            task_detail_result = await TaskRecordService.get_task_record_detail(task_record_id)
+    try:
+        while True:
+            try:
+                task_detail_result = await TaskRecordService.get_task_record_detail(task_record_id)

-            if task_detail_result.get("success", False):
-                task_detail = task_detail_result.get("data", {})
-                task_status = task_detail.get("status", "")
+                if task_detail_result.get("success", False):
+                    task_detail = task_detail_result.get("data", {})
+                    task_status = task_detail.get("status", "")

-                logger.info(f"监控AGV调度任务状态: task_record_id={task_record_id}, status={task_status}")
+                    logger.info(f"监控AGV调度任务状态: task_record_id={task_record_id}, status={task_status}")

-                # 如果任务已完成（成功）
-                if task_status == TaskStatus.COMPLETED:
-                    logger.info(f"AGV调度任务执行成功，开始调用AGV货物移动回调接口: task_record_id={task_record_id}, req_code={req_code}")
-                    # 调用AGV货物移动回调接口
-                    success = await call_agv_goods_move_callback(req_code)
-                    if success:
-                        logger.info(f"AGV货物移动回调接口调用成功: task_record_id={task_record_id}, req_code={req_code}")
+                    # 如果任务已完成（成功）
+                    if task_status == TaskStatus.COMPLETED:
+                        logger.info(
+                            f"AGV调度任务执行成功，开始调用AGV货物移动回调接口: task_record_id={task_record_id}, req_code={req_code}")
+                        # 调用AGV货物移动回调接口
+                        success = await call_agv_goods_move_callback(req_code)
+                        if success:
+                            logger.info(
+                                f"AGV货物移动回调接口调用成功: task_record_id={task_record_id}, req_code={req_code}")
+                        else:
+                            logger.error(
+                                f"AGV货物移动回调接口调用失败: task_record_id={task_record_id}, req_code={req_code}")
+                        break
+
+                    # 如果任务已失败或取消
+                    elif task_status in [TaskStatus.FAILED, TaskStatus.CANCELED]:
+                        logger.info(
+                            f"AGV调度任务执行失败或取消，不调用AGV货物移动回调接口: task_record_id={task_record_id}, status={task_status}")
+                        break
+
+                    # 任务还在运行中，继续等待
                    else:
-                        logger.error(f"AGV货物移动回调接口调用失败: task_record_id={task_record_id}, req_code={req_code}")
-                    break
+                        logger.debug(
+                            f"AGV调度任务仍在执行中，继续等待: task_record_id={task_record_id}, status={task_status}")
+                        await asyncio.sleep(2)

-                # 如果任务已失败或取消
-                elif task_status in [TaskStatus.FAILED, TaskStatus.CANCELED]:
-                    logger.info(f"AGV调度任务执行失败或取消，不调用AGV货物移动回调接口: task_record_id={task_record_id}, status={task_status}")
-                    break
-
-                # 任务还在运行中，继续等待
                else:
-                    logger.debug(f"AGV调度任务仍在执行中，继续等待: task_record_id={task_record_id}, status={task_status}")
-                    await asyncio.sleep(2)  # 等待2秒
+                    logger.warning(f"无法获取AGV调度任务详情，继续等待: task_record_id={task_record_id}")
+                    await asyncio.sleep(2)

-            else:
-                logger.warning(f"无法获取AGV调度任务详情，继续等待: task_record_id={task_record_id}")
-                await asyncio.sleep(2)  # 等待2秒
+            except asyncio.CancelledError:
+                logger.info(f"AGV监控任务被取消: task_record_id={task_record_id}, req_code={req_code}")
+                raise  # 重新抛出以便正确处理取消
+            except Exception as e:
+                logger.error(f"监控AGV调度任务状态时出现异常: {str(e)}, task_record_id={task_record_id}")
+                await asyncio.sleep(2)
+    finally:
+        # 确保释放所有资源
+        logger.info(f"AGV监控任务结束，清理资源: task_record_id={task_record_id}, req_code={req_code}")

-        except Exception as e:
-            logger.error(f"监控AGV调度任务状态时出现异常: {str(e)}, task_record_id={task_record_id}")
-            await asyncio.sleep(2)  # 等待2秒

 async def check_task_permission(tf_api_token: str, tf_api_base_url: str, module_name: str = "其他") -> bool:
    """
-    检查是否允许处理任务
+    检查是否允许处理任务（使用全局Session复用连接池）
    调用参数配置-三方接口调用接口检查系统限制
-    
+
    Args:
        tf_api_token: API访问令牌
        tf_api_base_url: API基础URL
        module_name: 模块名称，默认为"其他"
-        
+
    Returns:
        bool: True表示允许处理任务，False表示被限制
    """
@ -274,52 +411,54 @@ async def check_task_permission(tf_api_token: str, tf_api_base_url: str, module_
        "X-Access-Token": tf_api_token,
        "Content-Type": "text/plain"
    }
-    
+
    # 构建 API URL
    api_url = f"{tf_api_base_url}/parameter/getByModule"
-    
+
    try:
-        async with aiohttp.ClientSession() as session:
-            async with session.get(api_url, data=module_name, headers=headers) as response:
-                if response.status == 200:
-                    result = await response.json()
-                    logger.info(f"参数配置接口调用成功: result={result}")
-                    
-                    # 检查响应格式
-                    if result.get("success", False):
-                        parameter_result = result.get("result", {})
-                        sync_disabled = parameter_result.get(sync_disabled_label, "false")
-                        
-                        # 如果 sync_disabled 为 "true"，则被限制
-                        if sync_disabled == "true":
-                            logger.warning("系统限制创建任务: sync_disabled=true")
-                            return False
-                        else:
-                            logger.info("系统允许创建任务: sync_disabled=false")
-                            return True
+        # 使用全局HTTP会话，复用连接池
+        session = await get_http_session()
+        async with session.get(api_url, data=module_name, headers=headers) as response:
+            if response.status == 200:
+                result = await response.json()
+                logger.info(f"参数配置接口调用成功: result={result}")
+
+                # 检查响应格式
+                if result.get("success", False):
+                    parameter_result = result.get("result", {})
+                    sync_disabled = parameter_result.get(sync_disabled_label, "false")
+
+                    # 如果 sync_disabled 为 "true"，则被限制
+                    if sync_disabled == "true":
+                        logger.warning("系统限制创建任务: sync_disabled=true")
+                        return False
                    else:
-                        # 如果接口调用失败，默认允许处理任务
-                        logger.warning(f"参数配置接口调用失败: {result.get('message', '未知错误')}")
+                        logger.info("系统允许创建任务: sync_disabled=false")
                        return True
                else:
-                    logger.error(f"参数配置接口调用失败: status={response.status}")
-                    response_text = await response.text()
-                    logger.error(f"响应内容: {response_text}")
                    # 如果接口调用失败，默认允许处理任务
+                    logger.warning(f"参数配置接口调用失败: {result.get('message', '未知错误')}")
                    return True
+            else:
+                logger.error(f"参数配置接口调用失败: status={response.status}")
+                response_text = await response.text()
+                logger.error(f"响应内容: {response_text}")
+                # 如果接口调用失败，默认允许处理任务
+                return True
    except Exception as e:
        logger.error(f"系统接口服务异常: error={str(e)}")
        # 如果出现异常，默认允许处理任务
        return False

+
 async def get_amr_loading_state(task_record_id: str, tf_api_token: str) -> Dict[str, Any]:
    """
-    获取任务中小车负载状态
-    
+    获取任务中小车负载状态（使用全局Session复用连接池）
+
    Args:
        task_record_id: 天风任务ID
        tf_api_token: API访问令牌
-    
+
    Returns:
        Dict[str, Any]: 包含小车负载状态的响应数据
    """
@ -327,26 +466,27 @@ async def get_amr_loading_state(task_record_id: str, tf_api_token: str) -> Dict[
        "X-Access-Token": tf_api_token,
        "Content-Type": "application/json"
    }
-    
+
    # 构建 API URL
    api_url = f"{TF_API_BASE_URL}/task/vwedtask/{task_record_id}/getAmrState"
-    
+
    try:
-        async with aiohttp.ClientSession() as session:
-            async with session.get(api_url, headers=headers) as response:
-                if response.status == 200:
-                    result = await response.json()
-                    logger.info(f"获取小车负载状态成功: task_record_id={task_record_id}, result={result}")
-                    return result
-                else:
-                    logger.error(f"获取小车负载状态失败: task_record_id={task_record_id}, status={response.status}")
-                    response_text = await response.text()
-                    logger.error(f"响应内容: {response_text}")
-                    return {
-                        "success": False,
-                        "message": f"HTTP {response.status}: {response_text}",
-                        "code": response.status
-                    }
+        # 使用全局HTTP会话，复用连接池
+        session = await get_http_session()
+        async with session.get(api_url, headers=headers) as response:
+            if response.status == 200:
+                result = await response.json()
+                logger.info(f"获取小车负载状态成功: task_record_id={task_record_id}, result={result}")
+                return result
+            else:
+                logger.error(f"获取小车负载状态失败: task_record_id={task_record_id}, status={response.status}")
+                response_text = await response.text()
+                logger.error(f"响应内容: {response_text}")
+                return {
+                    "success": False,
+                    "message": f"HTTP {response.status}: {response_text}",
+                    "code": response.status
+                }
    except Exception as e:
        logger.error(f"获取小车负载状态异常: task_record_id={task_record_id}, error={str(e)}")
        return {
@ -355,6 +495,7 @@ async def get_amr_loading_state(task_record_id: str, tf_api_token: str) -> Dict[
            "code": 500
        }

+
 # # 任务类型到任务优先级
 TASK_TYPE_TEMPLATE_MAPPING = {
    TaskTypeEnum.GG2MP: "GG",
@ -393,6 +534,8 @@ TASK_TYPE_REMARK = {
    TaskTypeEnum.MP2LG: "毛坯库:{0}-连杆:{1}",
    TaskTypeEnum.MP2PHZ: "毛坯库:{0}-平衡轴:{1}",
 }
+
+
@router.post("/newTask")
 async def create_new_task(request: Request, task_request: ExternalTaskRequest = Body(...)):
    """
@ -539,7 +682,7 @@ async def create_new_task(request: Request, task_request: ExternalTaskRequest =
            source_device=request.client.host if request.client else "unknown",  # 使用客户端IP作为设备标识
            use_modbus=False,
            modbus_timeout=5000,
-            priority = priority
+            priority=priority
        )

        # 更新外部任务记录状态为运行中
@ -612,7 +755,7 @@ async def create_new_task(request: Request, task_request: ExternalTaskRequest =
            message="成功",
            rowCount=1
        )
-        
+
    except Exception as e:
        logger.error(f"创建外部任务异常: {str(e)}, ReqCode={task_request.ReqCode}")
        # 如果已创建外部任务记录，更新状态为失败
@ -628,7 +771,7 @@ async def create_new_task(request: Request, task_request: ExternalTaskRequest =
                )
            except Exception as update_error:
                logger.error(f"更新外部任务记录状态失败: {str(update_error)}")
-        
+
        return ExternalTaskResponse(
            code=500,
            reqCode=task_request.ReqCode,
@ -636,12 +779,13 @@ async def create_new_task(request: Request, task_request: ExternalTaskRequest =
            rowCount=0
        )

+
@router.post("/GenAgvSchedulingTask")
 async def gen_agv_scheduling_task(request: Request, task_request: GenAgvSchedulingTaskRequest = Body(...)):
    """
    AGV调度任务接口
    用于生成AGV调度任务
-    
+
    逻辑：
    1. 根据 taskcode 参数查询 external_task_record 表获取对应的 task_record_id
    2. 调用 get_task_record_detail 接口查询任务运行状态
@ -657,11 +801,10 @@ async def gen_agv_scheduling_task(request: Request, task_request: GenAgvScheduli
    priority = TASK_TYPE_PRIORITY.get(TASK_TYPE_TEMPLATE_MAPPING.get(task_request.TaskTyp, "OR"))
    remark = TASK_TYPE_REMARK.get(task_request.TaskTyp)

-
    external_record = None
    try:
        logger.info(f"收到AGV调度任务请求:{task_request}")
-        
+
        # 检查系统是否允许处理任务
        tf_api_token = await get_tf_api_token()
        is_allowed = await check_task_permission(tf_api_token, TF_API_BASE_URL)
@ -673,12 +816,12 @@ async def gen_agv_scheduling_task(request: Request, task_request: GenAgvScheduli
                message="由于系统限制创建任务失败，请联系管理员",
                rowCount=0
            )
-        
+
        # 导入数据库相关模块
        from data.session import get_async_session
        from data.models.operate_point_layer import OperatePointLayer
        from sqlalchemy import select
-        
+
        # 验证任务条件已移至脚本处理器中，此处保留简单检查
        if not task_request.TaskCode:
            return ExternalTaskResponse(
@ -698,16 +841,15 @@ async def gen_agv_scheduling_task(request: Request, task_request: GenAgvScheduli
        }
        client_info_str = json.dumps(client_info, ensure_ascii=False)

-
        # 根据任务类型获取对应的模板ID
        template_id = DG_ID

-        
        # 创建外部任务记录
        external_record = await ExternalTaskRecordService.create_agv_scheduling_task_record(
            req_code=task_request.ReqCode,
            task_code=task_request.TaskCode,
-            business_task_type=task_request.TaskTyp.value if hasattr(task_request.TaskTyp, 'value') else str(task_request.TaskTyp),
+            business_task_type=task_request.TaskTyp.value if hasattr(task_request.TaskTyp, 'value') else str(
+                task_request.TaskTyp),
            security_key=task_request.SecurityKey or "",
            type_field=task_request.Type,
            sub_type=task_request.SubType,
@ -778,7 +920,7 @@ async def gen_agv_scheduling_task(request: Request, task_request: GenAgvScheduli
        task_params.append(TaskInputParamNew(
            name="priority",
            type=InputParamType.STRING,
-            label="优先级", 
+            label="优先级",
            required=False,
            defaultValue=priority,
            remark="选车优先级"
@ -786,7 +928,7 @@ async def gen_agv_scheduling_task(request: Request, task_request: GenAgvScheduli
        task_params.append(TaskInputParamNew(
            name="TASK_CODE",
            type=InputParamType.STRING,
-            label="任务id", 
+            label="任务id",
            required=False,
            defaultValue=task_request.TaskCode,
            remark="创建任务时任务id"
@ -794,7 +936,7 @@ async def gen_agv_scheduling_task(request: Request, task_request: GenAgvScheduli
        task_params.append(TaskInputParamNew(
            name="TASK_TYPE",
            type=InputParamType.STRING,
-            label="任务类型", 
+            label="任务类型",
            required=False,
            defaultValue=task_request.TaskTyp,
            remark="创建任务时任务类型"
@ -816,7 +958,7 @@ async def gen_agv_scheduling_task(request: Request, task_request: GenAgvScheduli
            source_device=request.client.host if request.client else "unknown",  # 使用客户端IP作为设备标识
            use_modbus=False,
            modbus_timeout=5000,
-            priority = priority
+            priority=priority
        )

        # 更新外部任务记录状态为运行中
@ -885,22 +1027,25 @@ async def gen_agv_scheduling_task(request: Request, task_request: GenAgvScheduli
        )

        logger.info(f"AGV调度任务启动成功: ReqCode={task_request.ReqCode}, TaskRecordId={task_record_id}")
-        
+
        # 定义需要监控的任务类型
        agv_callback_task_types = ["MP2GG", "MP2GT", "MP2ZG", "MP2QZ", "MP2LG", "MP2PHZ"]
-        
-        # 启动异步任务监控，不阻塞当前接口 - 使用后台任务管理器
+
+        # 启动异步任务监控，不阻塞当前接口
        if task_record_id and task_request.TaskTyp in agv_callback_task_types and TF_WEB_POST:
-            create_background_task(
-                monitor_agv_task_and_callback(
-                    task_record_id=task_record_id,
-                    req_code=task_request.TaskCode
-                ),
-                name=f"monitor_agv_task_{task_record_id}",
-                context=f"TaskType={task_request.TaskTyp}, ReqCode={task_request.TaskCode}"
-            )
-            logger.info(f"已启动AGV调度任务监控: TaskType={task_request.TaskTyp}, TaskRecordId={task_record_id}")
-        
+            # 创建后台监控任务并添加到管理集合
+            monitor_task = asyncio.create_task(monitor_agv_task_and_callback(
+                task_record_id=task_record_id,
+                req_code=task_request.TaskCode
+            ))
+            # 将任务添加到集合中进行追踪
+            _background_monitor_tasks.add(monitor_task)
+            # 添加完成回调，自动清理
+            monitor_task.add_done_callback(_cleanup_background_task)
+
+            logger.info(
+                f"已启动AGV调度任务监控: TaskType={task_request.TaskTyp}, TaskRecordId={task_record_id}, 当前活跃监控任务数: {len(_background_monitor_tasks)}")
+
        return ExternalTaskResponse(
            code=0,
            reqCode=task_request.TaskCode,
@ -923,7 +1068,7 @@ async def gen_agv_scheduling_task(request: Request, task_request: GenAgvScheduli
                )
            except Exception as update_error:
                logger.error(f"更新外部任务记录状态失败: {str(update_error)}")
-        
+
        return ExternalTaskResponse(
            code=500,
            reqCode=task_request.ReqCode,
@ -931,21 +1076,22 @@ async def gen_agv_scheduling_task(request: Request, task_request: GenAgvScheduli
            rowCount=0
        )

+
@router.post("/cancelTask")
 async def cancel_task(request: Request, cancel_request: CancelTaskRequest = Body(...)):
    """
    取消任务接口
    根据ReqCode查询对应的task_record_id，然后调用内部接口终止任务并通知主系统
-    
+
    Args:
        cancel_request: 取消任务请求，包含ReqCode
-        
+
    Returns:
        ExternalTaskResponse: 包含code、reqCode、message、rowCount的响应
    """
    try:
        logger.info(f"收到取消任务请求: {cancel_request}")
-        
+
        # 检查系统是否允许处理任务
        tf_api_token = await get_tf_api_token()
        is_allowed = await check_task_permission(tf_api_token, TF_API_BASE_URL)
@ -957,9 +1103,9 @@ async def cancel_task(request: Request, cancel_request: CancelTaskRequest = Body
                message="由于系统限制创建任务失败，请联系管理员",
                rowCount=0
            )
-        
+
        req_code = cancel_request.ReqCode
-        
+
        # 根据req_code查询external_task_record获取task_record_id
        external_record = await ExternalTaskRecordService.get_external_task_record(req_code)
        if not external_record:
@ -970,7 +1116,7 @@ async def cancel_task(request: Request, cancel_request: CancelTaskRequest = Body
                message="未找到对应的任务记录",
                rowCount=0
            )
-        
+
        task_record_id = external_record.task_record_id
        if not task_record_id:
            logger.error(f"外部任务记录中没有关联的task_record_id: ReqCode={req_code}")
@ -980,7 +1126,7 @@ async def cancel_task(request: Request, cancel_request: CancelTaskRequest = Body
                message="任务记录中没有关联的内部任务ID",
                rowCount=0
            )
-        
+
        # 通过task_record_id查询任务详情，检查任务状态
        task_detail_result = await TaskRecordService.get_task_record_detail(task_record_id)
        if not task_detail_result.get("success", False):
@ -991,10 +1137,10 @@ async def cancel_task(request: Request, cancel_request: CancelTaskRequest = Body
                message="获取任务详情失败",
                rowCount=0
            )
-        
+
        task_detail = task_detail_result.get("data", {})
        task_status = task_detail.get("status", "")
-        
+
        # 检查任务状态，只有运行状态的任务才允许取消
        if task_status in [TaskStatus.COMPLETED, TaskStatus.FAILED, TaskStatus.CANCELED]:
            logger.warning(f"任务已处于终止状态，无法取消: ReqCode={req_code}, TaskStatus={task_status}")
@ -1004,18 +1150,19 @@ async def cancel_task(request: Request, cancel_request: CancelTaskRequest = Body
                message=f"任务已处于{task_status}状态，无法取消",
                rowCount=0
            )
-        
+
        # 检查小车负载状态
        logger.info(f"检查小车负载状态: task_record_id={task_record_id}")
        amr_state_result = await get_amr_loading_state(task_record_id, tf_api_token)
-        
+
        if amr_state_result.get("success", False):
            amr_state_data = amr_state_result.get("result", {})
            amr_loading = amr_state_data.get("amr_loading", False)
            amr_name = amr_state_data.get("amr_name", "")
-            
-            logger.info(f"小车负载状态: task_record_id={task_record_id}, amr_loading={amr_loading}, amr_name={amr_name}")
-            
+
+            logger.info(
+                f"小车负载状态: task_record_id={task_record_id}, amr_loading={amr_loading}, amr_name={amr_name}")
+
            # 如果小车处于负载状态，不允许取消任务
            if amr_loading:
                logger.warning(f"小车处于负载状态，不允许终止任务: ReqCode={req_code}, AMR={amr_name}")
@ -1028,15 +1175,15 @@ async def cancel_task(request: Request, cancel_request: CancelTaskRequest = Body
        else:
            # 如果获取小车状态失败，记录警告但继续执行取消操作
            logger.warning(f"获取小车负载状态失败，继续执行取消操作: {amr_state_result.get('message', '')}")
-        
+
        # 调用内部接口停止任务
        logger.info(f"调用内部接口停止任务: task_record_id={task_record_id}")
        stop_result = await TaskRecordService.stop_task_record(task_record_id)
-        
+
        if not stop_result.get("success", False):
            error_msg = stop_result.get("message", "停止任务失败")
            logger.error(f"停止任务失败: {error_msg}, task_record_id={task_record_id}")
-            
+
            # 检查是否是"已载货，请人工干预"的情况
            if "已载货" in error_msg or "人工干预" in error_msg:
                return ExternalTaskResponse(
@ -1045,23 +1192,23 @@ async def cancel_task(request: Request, cancel_request: CancelTaskRequest = Body
                    message="已载货，请人工干预",
                    rowCount=0
                )
-            
+
            return ExternalTaskResponse(
                code=1,
                reqCode=req_code,
                message=error_msg,
                rowCount=0
            )
-        
+
        # 通知主系统任务已终止
        logger.info(f"通知主系统任务已终止: task_record_id={task_record_id}")
-        
+
        try:
            await set_task_terminated(task_record_id, tf_api_token)
            logger.info(f"成功通知主系统任务已终止: task_record_id={task_record_id}")
        except Exception as sync_error:
            logger.warning(f"通知主系统失败，但任务已成功取消: {str(sync_error)}, task_record_id={task_record_id}")
-        
+
        # 更新外部任务记录状态为已取消
        await ExternalTaskRecordService.update_task_record_status(
            req_code=external_record.id,
@ -1070,7 +1217,7 @@ async def cancel_task(request: Request, cancel_request: CancelTaskRequest = Body
            response_message="任务已取消",
            response_row_count=0
        )
-        
+
        logger.info(f"任务取消成功: ReqCode={req_code}, TaskRecordId={task_record_id}")
        return ExternalTaskResponse(
            code=0,
@ -1078,7 +1225,7 @@ async def cancel_task(request: Request, cancel_request: CancelTaskRequest = Body
            message="成功",
            rowCount=0
        )
-        
+
    except Exception as e:
        logger.error(f"取消任务异常: {str(e)}, ReqCode={cancel_request.ReqCode}")
        return ExternalTaskResponse(
@ -1091,27 +1238,27 @@ async def cancel_task(request: Request, cancel_request: CancelTaskRequest = Body

@router.get("/api/external-task-record/by-req-code/{req_code}")
 async def get_external_task_record_by_req_code(
-    req_code: str = Path(..., description="请求标识码")
+        req_code: str = Path(..., description="请求标识码")
 ):
    """
    根据ReqCode查询外部任务记录
-    
+
    Args:
        req_code: 请求标识码
-        
+
    Returns:
        包含外部任务记录信息的响应
    """
    try:
        # 查询外部任务记录
        external_record = await ExternalTaskRecordService.get_external_task_record(req_code)
-        
+
        if not external_record:
            return error_response(
                message=f"未找到ReqCode为 {req_code} 的外部任务记录",
                code=404
            )
-        
+
        # 构建响应数据
        response_data = {
            "id": external_record.id,
@ -1133,15 +1280,42 @@ async def get_external_task_record_by_req_code(
            "created_at": external_record.created_at.isoformat() if external_record.created_at else None,
            "updated_at": external_record.updated_at.isoformat() if external_record.updated_at else None
        }
-        
+
        return format_response(
            data=response_data,
            message="成功获取外部任务记录"
        )
-        
+
    except Exception as e:
        logger.error(f"查询外部任务记录异常: {str(e)}, req_code={req_code}")
        return error_response(
            message=f"查询外部任务记录失败: {str(e)}",
            code=500
+        )
+
+
+@router.get("/api/monitor-tasks/status")
+async def get_monitor_tasks_status():
+    """
+    获取后台监控任务状态（用于调试和监控）
+
+    Returns:
+        包含活跃监控任务数量的响应
+    """
+    try:
+        active_count = get_active_monitor_tasks_count()
+
+        return format_response(
+            data={
+                "activeMonitorTasks": active_count,
+                "message": f"当前有 {active_count} 个活跃的后台监控任务"
+            },
+            message="成功获取监控任务状态"
+        )
+
+    except Exception as e:
+        logger.error(f"获取监控任务状态异常: {str(e)}")
+        return error_response(
+            message=f"获取监控任务状态失败: {str(e)}",
+            code=500
        )
--- a/services/pycache/sync_service.cpython-312.pyc
+++ b/services/pycache/sync_service.cpython-312.pyc
--- a/services/enhanced_scheduler/pycache/task_scheduler.cpython-313.pyc
+++ b/services/enhanced_scheduler/pycache/task_scheduler.cpython-313.pyc
--- a/services/enhanced_scheduler/task_scheduler.py
+++ b/services/enhanced_scheduler/task_scheduler.py
@ -975,26 +975,46 @@ class EnhancedTaskScheduler:
            source_client_info=source_client_info
        )
    
+    def _cleanup_executor(self, executor: Optional['TaskExecutor'], context: str = "") -> None:
+        """
+        清理executor对象，防止内存泄漏
+
+        Args:
+            executor: 任务执行器对象
+            context: 上下文信息，用于日志记录
+        """
+        if executor:
+            try:
+                if hasattr(executor, 'cleanup'):
+                    executor.cleanup()
+                    logger.debug(f"{context} executor已清理")
+            except Exception as e:
+                logger.error(f"{context} 清理executor异常: {str(e)}")
+
    async def _worker(self, worker_id: int) -> None:
        """
        工作线程
        从队列中获取任务并执行
-        
+
        Args:
            worker_id: 工作线程ID
        """
        logger.info(f"工作线程 {worker_id} 启动")
-        
+
        while self.is_running:
+            executor = None  # 在外层定义，确保异常处理时可访问
+            task_record_id = None
+            queue_index = -1
+            cancel_checker_task = None
            try:
                # 从队列获取任务
                queue_index, item = await self.queue_manager.dequeue(worker_id, self.worker_manager.get_worker_count())
-                
+
                # 如果没有任务，继续等待
                if queue_index == -1 or item is None:
                    await asyncio.sleep(0.1)  # 短暂休眠
                    continue
-                
+
                # 解析优先级和任务ID
                if isinstance(item, tuple) and len(item) == 2:
                    priority, task_record_id = item
@ -1004,43 +1024,35 @@ class EnhancedTaskScheduler:
                    # 兼容旧格式
                    task_record_id = item
                    priority = PeriodicTaskStatus.PERIODIC
-                
+
                # 更新工作线程状态
                self.worker_manager.update_worker_status(worker_id, {
                    "current_task": task_record_id,
                    "task_priority": priority,
                    "task_start_time": datetime.now()
                })
-                
+
                logger.info(f"工作线程 {worker_id} 获取到任务: {task_record_id}, 优先级: {priority}")
-                
-                # 执行任务
+
+                # 创建任务执行器
+                executor = TaskExecutor(task_record_id)
+
+                # 设置超时时间
+                executor.set_timeout(self.task_timeout)
+
+                # 记录到正在执行的任务
+                self.running_tasks[task_record_id] = executor
+
+                # 从持久化管理器中移除（正在执行的任务不需要持久化）
+                await self.persistence_manager.remove_task(task_record_id)
+
+                # 创建一个取消任务检查器，定期检查数据库中任务是否被标记为取消
+                cancel_checker_task = asyncio.create_task(self._check_task_cancel(task_record_id, executor))
+
                try:
-                    # 创建任务执行器
-                    executor = TaskExecutor(task_record_id)
-                    
-                    # 设置超时时间
-                    executor.set_timeout(self.task_timeout)
-                    
-                    # 记录到正在执行的任务
-                    self.running_tasks[task_record_id] = executor
-                    
-                    # 从持久化管理器中移除（正在执行的任务不需要持久化）
-                    await self.persistence_manager.remove_task(task_record_id)
-                    
-                    # 创建一个取消任务检查器，定期检查数据库中任务是否被标记为取消
-                    cancel_checker_task = asyncio.create_task(self._check_task_cancel(task_record_id, executor))
-                    
                    # 执行任务
                    result = await executor.execute()
-                    
-                    # 取消检查器任务
-                    cancel_checker_task.cancel()
-                    try:
-                        await cancel_checker_task
-                    except asyncio.CancelledError:
-                        pass
-                    
+
                    # 更新工作线程状态
                    self.worker_manager.update_worker_status(worker_id, {
                        "current_task": None,
@ -1049,62 +1061,12 @@ class EnhancedTaskScheduler:
                        "task_count": self.worker_manager.worker_status[worker_id].get("task_count", 0) + 1
                    })

-                    # 显式清理executor对象，释放内存
-                    try:
-                        if executor:
-                            # 清理TaskContext
-                            if hasattr(executor, 'task_context') and executor.task_context:
-                                if hasattr(executor.task_context, 'cleanup'):
-                                    executor.task_context.cleanup()
-                                executor.task_context = None
-
-                            # 清理BlockExecutor
-                            if hasattr(executor, 'block_executor') and executor.block_executor:
-                                executor.block_executor.task_context = None
-                                executor.block_executor = None
-
-                            # 清空其他引用
-                            executor.task_record = None
-                            executor.task_def = None
-
-                            logger.debug(f"任务 {task_record_id} 的executor已清理")
-                    except Exception as cleanup_error:
-                        logger.error(f"清理executor失败: {str(cleanup_error)}")
-
-                    # 移除正在执行的任务
-                    self.running_tasks.pop(task_record_id, None)
-
                    logger.info(f"工作线程 {worker_id} 完成任务: {task_record_id}, 结果: {result.get('success')}")
-                    
+
                except Exception as e:
                    logger.error(f"工作线程 {worker_id} 执行任务异常: {str(e)}")
                    logger.error(traceback.format_exc())

-                    # 显式清理executor对象，释放内存
-                    try:
-                        if executor:
-                            # 清理TaskContext
-                            if hasattr(executor, 'task_context') and executor.task_context:
-                                if hasattr(executor.task_context, 'cleanup'):
-                                    executor.task_context.cleanup()
-                                executor.task_context = None
-
-                            # 清理BlockExecutor
-                            if hasattr(executor, 'block_executor') and executor.block_executor:
-                                executor.block_executor.task_context = None
-                                executor.block_executor = None
-
-                            # 清空其他引用
-                            executor.task_record = None
-                            executor.task_def = None
-
-                            logger.debug(f"任务 {task_record_id} 的executor已清理(异常分支)")
-                    except Exception as cleanup_error:
-                        logger.error(f"清理executor失败(异常分支): {str(cleanup_error)}")
-
-                    # 移除正在执行的任务
-                    self.running_tasks.pop(task_record_id, None)
-
                    # 检查是否需要重试
                    await self._handle_task_error(task_record_id, str(e))

@ -1116,30 +1078,54 @@ class EnhancedTaskScheduler:
                        "last_error": str(e),
                        "task_count": self.worker_manager.worker_status[worker_id].get("task_count", 0) + 1
                    })
-                
+
+                finally:
+                    # 统一清理逻辑，无论成功或失败都会执行
+                    # 取消检查器任务
+                    if cancel_checker_task:
+                        cancel_checker_task.cancel()
+                        try:
+                            await cancel_checker_task
+                        except asyncio.CancelledError:
+                            pass
+
+                    # 移除正在执行的任务
+                    if task_record_id:
+                        self.running_tasks.pop(task_record_id, None)
+
+                    # 清理executor对象，防止内存泄漏
+                    self._cleanup_executor(executor, f"工作线程 {worker_id}")
+                    executor = None
+
                # 标记任务完成
-                self.queue_manager.task_done(queue_index)
-                
+                if queue_index != -1:
+                    self.queue_manager.task_done(queue_index)
+
                # 更新工作线程心跳
                self.worker_manager.update_worker_heartbeat(worker_id)
-                
+
            except asyncio.TimeoutError:
                # 超时，继续下一次循环
                continue
            except asyncio.CancelledError:
                # 取消异常，退出循环
                logger.info(f"工作线程 {worker_id} 被取消")
+                # 清理可能存在的executor
+                self._cleanup_executor(executor, f"工作线程 {worker_id} 取消时")
                break
            except Exception as e:
                logger.error(f"工作线程 {worker_id} 异常: {str(e)}")
                logger.error(traceback.format_exc())
-                
+
+                # 清理可能存在的executor
+                self._cleanup_executor(executor, f"工作线程 {worker_id} 异常时")
+
                # 更新工作线程状态
                self.worker_manager.update_worker_status(worker_id, {
                    "error": str(e),
                    "error_time": datetime.now()
                })
-                
+
                # 短暂休眠，避免频繁错误
                await asyncio.sleep(1.0)
        
--- a/services/execution/pycache/block_executor.cpython-312.pyc
+++ b/services/execution/pycache/block_executor.cpython-312.pyc
--- a/services/execution/pycache/block_executor.cpython-313.pyc
+++ b/services/execution/pycache/block_executor.cpython-313.pyc
--- a/services/execution/pycache/task_context.cpython-313.pyc
+++ b/services/execution/pycache/task_context.cpython-313.pyc
--- a/services/execution/pycache/task_executor.cpython-313.pyc
+++ b/services/execution/pycache/task_executor.cpython-313.pyc
--- a/services/execution/block_executor.py
+++ b/services/execution/block_executor.py
@ -36,13 +36,14 @@ class BlockExecutor:
    def __init__(self, task_context: TaskContext):
        """
        初始化块执行器
-        
+
        Args:
            task_context: 任务上下文
        """
        self.task_context = task_context
        self.is_canceled = False
        self.is_error = False
+        self._is_cleaned = False  # 防止重复清理标志
    
    def cancel(self) -> None:
        """
@ -168,12 +169,24 @@ class BlockExecutor:

                    return result
                else:
-                    # 更新块状态为失败
-                    error_msg = result.get("message", "执行失败")
-                    await self._update_block_record(block_record_id, TaskBlockRecordStatus.FAILED, error_msg)
+                    # 检查是否是"自身成功但子块失败"的特殊情况
+                    if result.get("self_success", False):
+                        # 本块执行成功，更新状态为成功
+                        success_msg = result.get("message", "执行成功")
+                        await self._update_block_record(
+                            block_record_id, 
+                            TaskBlockRecordStatus.SUCCESS, 
+                            success_msg,
+                            result.get("output", {})
+                        )
+                        logger.info(f"块 {block_name} 自身执行成功，但子块失败")
+                    else:
+                        # 更新块状态为失败
+                        error_msg = result.get("message", "执行失败")
+                        await self._update_block_record(block_record_id, TaskBlockRecordStatus.FAILED, error_msg)
                    
-                    # 设置错误信息
-                    self.task_context.set_error(error_msg, block_id)
+                    # 设置错误信息（无论哪种情况，任务都要标记为失败）
+                    self.task_context.set_error(result.get("message", "执行失败"), block_id)
                    
                    return result
            except Exception as e:
@ -448,13 +461,27 @@ class BlockExecutor:
                                }
                        else:
                            logger.error(f"子块 {child_id} 执行失败: {result.get('message', '未知错误')}")
-                            # 更新块记录状态为失败
-                            await self._update_block_record(
-                                block_record_id,
-                            TaskBlockRecordStatus.FAILED,  # 失败
-                            result.get("message", "执行失败"),
-                        )
-
+                            
+                            # 检查是否是"自身成功但子块失败"的特殊情况
+                            if result.get("self_success", False):
+                                # 本块执行成功，更新状态为成功
+                                success_msg = result.get("message", "执行成功")
+                                await self._update_block_record(
+                                    block_record_id, 
+                                    TaskBlockRecordStatus.SUCCESS, 
+                                    success_msg,
+                                    result.get("output", {})
+                                )
+                                logger.info(f"子块 {child_name} 自身执行成功，但其子块失败")
+                            else:
+                                # 更新块记录状态为失败
+                                await self._update_block_record(
+                                    block_record_id,
+                                    TaskBlockRecordStatus.FAILED,  # 失败
+                                    result.get("message", "执行失败"),
+                                )
+                            
+                            # 无论哪种情况，都需要终止后续兄弟块的执行
                            # 为剩余未执行的兄弟块创建终止记录
                            await self._create_terminated_records_for_remaining_siblings(
                                children, i + 1, f"因前序块 {child_name} 执行失败而终止"
@ -1833,4 +1860,31 @@ class BlockExecutor:
        params = params_str.split(",")

            
-        return params 
+        return params
+
+
+    def cleanup(self) -> None:
+        """
+        清理 BlockExecutor 占用的资源
+        优化：释放对 task_context 的引用，避免循环引用导致的内存泄漏
+        """
+        # 防止重复清理
+        if self._is_cleaned:
+            return
+
+        try:
+            # 清空 task_context 引用 (注意: task_context 由 TaskExecutor 负责清理)
+            # 这里只需要解除引用，不调用 task_context.cleanup()
+            if self.task_context:
+                self.task_context = None
+
+            # 重置状态标志
+            self.is_canceled = False
+            self.is_error = False
+
+            # 标记已清理
+            self._is_cleaned = True
+
+            logger.debug("BlockExecutor 已清理")
+        except Exception as e:
+            logger.error(f"清理 BlockExecutor 失败: {str(e)}")
--- a/services/execution/handlers/pycache/robot_scheduling.cpython-312.pyc
+++ b/services/execution/handlers/pycache/robot_scheduling.cpython-312.pyc
--- a/services/execution/handlers/robot_scheduling.py
+++ b/services/execution/handlers/robot_scheduling.py
@ -341,14 +341,15 @@ class RobotBlockHandler(BlockHandler):
                logger.info(f"成功更新任务记录 {task_record_id} 的agv_id字段: {final_agv_id}")
        except Exception as e:
            logger.error(f"更新任务记录 {task_record_id} 的agv_id字段时发生错误: {str(e)}")
-    async def update_block_record(self, block_record_id: str, agv_id: str = None) -> None:
+    async def update_block_record(self, block_record_id: str, agv_id: str = None, block_name: str = None, output_data: Dict[str, Any] = None) -> None:
        """
-        更新块记录的通用方法
+        更新块记录的通用方法，立即更新输出参数
        
        Args:
            block_record_id: 块记录ID
-            status: 状态码
-            message: 消息
+            agv_id: 选择的机器人名称
+            block_name: 块名称
+            output_data: 输出数据
        """
        try:
            from sqlalchemy.ext.asyncio import AsyncSession
@ -356,18 +357,54 @@ class RobotBlockHandler(BlockHandler):
            from data.models.blockrecord import VWEDBlockRecord
            from sqlalchemy import select, update
            from data.enum.task_block_record_enum import TaskBlockRecordStatus
+            from data.enum.task_input_param_enum import TaskInputParamVariables
+            from datetime import datetime
+            
            if not block_record_id:
                logger.warning(f"未提供块记录ID，无法更新块记录")
                return
-            stmt = update(VWEDBlockRecord).where(VWEDBlockRecord.id == block_record_id).values(
-                status=TaskBlockRecordStatus.SUCCESS,
-                ended_reason="执行成功",
-                remark="执行成功"
-            )
+            
            async with get_async_session() as session:
                session: AsyncSession = session
+                
+                # 获取块记录以获取块名称
+                stmt_select = select(VWEDBlockRecord).where(VWEDBlockRecord.id == block_record_id)
+                result = await session.execute(stmt_select)
+                block_record = result.scalar_one_or_none()
+                
+                if not block_record:
+                    logger.warning(f"未找到块记录: {block_record_id}")
+                    return
+                
+                # 使用传入的块名称或从记录中获取
+                actual_block_name = block_name or block_record.block_name
+                
+                # 构建输出参数
+                update_values = {
+                    "status": TaskBlockRecordStatus.SUCCESS,
+                    "ended_reason": "选择机器人成功",
+                    "remark": "选择机器人成功"
+                }
+                
+                # 如果提供了输出数据，立即更新输出参数
+                if output_data:
+                    # 构建完整输出结构 {"blocks": {"块名称": 输出内容}}
+                    full_output = {TaskInputParamVariables.BLOCKS: {actual_block_name: output_data}}
+                    output_full_json = json.dumps(full_output, ensure_ascii=False)
+                    output_value_json = json.dumps(output_data, ensure_ascii=False)
+                    
+                    update_values["output_params"] = output_full_json
+                    update_values["block_out_params_value"] = output_value_json
+                    
+                    logger.info(f"立即更新块 {actual_block_name} 的输出参数: {output_data}")
+                
+                # 更新块记录
+                stmt = update(VWEDBlockRecord).where(VWEDBlockRecord.id == block_record_id).values(**update_values)
                await session.execute(stmt)
                await session.commit()
+                
+                logger.info(f"成功更新块记录 {block_record_id} 的状态和输出参数")
+                
        except Exception as e:
            logger.error(f"更新块记录 {block_record_id} 时发生错误: {str(e)}")

@ -840,20 +877,29 @@ class SelectAgvBlockHandler(RobotBlockHandler):
                    }
                    await self._record_task_log(block, result, context)
                    return result
+                # 获取当前块ID和名称（提前获取以便更新块记录）
+                current_block_id = block.get("id", "unknown")
+                current_block_name = block.get("name", f"b{current_block_id}")
+                
+                # 构建输出数据
+                output_data = {
+                    "selectedAgvId": amr_name,
+                }
+                
                results = {
                    "success": True,
                    "message": f"选择机器人块成功, 块id：{current_block_name}",
-                    "output": {
-                        "selectedAgvId": amr_name,
-                    }
+                    "output": output_data
                }
                await self._record_task_log(block, results, context)
-                # 更新块记录状态为成功
-                await self.update_block_record(context.block_record_id, amr_name)
                
-                # 获取当前块ID和名称
-                current_block_id = block.get("id", "unknown")
-                current_block_name = block.get("name", f"b{current_block_id}")
+                # 立即更新块记录状态为成功，并更新输出参数
+                await self.update_block_record(
+                    block_record_id=context.block_record_id, 
+                    agv_id=amr_name,
+                    block_name=current_block_name,
+                    output_data=output_data
+                )
                # 更新任务记录中的agv_id字段
                await self._update_task_record_agv_id(context.task_record_id, amr_name)

@ -926,7 +972,7 @@ class SelectAgvBlockHandler(RobotBlockHandler):
                        if "子块" not in result["message"]:
                            result["message"] = f"{result['message']}，子块执行成功, 块id：{current_block_name}"
                    else:
-                        # 子块执行失败，根据失败的子块更新消息
+                        # 子块执行失败，但选择机器人本身是成功的
                        logger.error(f"选择机器人块 {current_block_name} 的子块执行失败: {loop_result.get('message')}")
                        
                        # 创建包含子块失败信息的结果
@ -935,6 +981,8 @@ class SelectAgvBlockHandler(RobotBlockHandler):
                        
                        result = {
                            "success": False,
+                            "self_success": True,  # 标记：自身执行成功，但子块失败
+                            "children_failed": True,  # 标记：子块执行失败
                            "message": f"选择执行机器人成功 选择小车：{amr_name}，但子块执行失败: {error_msg}，失败块ID: {failed_block_id}",
                            "output": {
                                "selectedAgvId": amr_name,
--- a/services/execution/handlers/storage_queue_manager.py
+++ b/services/execution/handlers/storage_queue_manager.py
@ -20,6 +20,7 @@ from config.settings import settings

 logger = get_logger("services.execution.handlers.storage_queue_manager")

+
 class RequestPriority(Enum):
    """请求优先级"""
    LOW = 1
@ -27,6 +28,7 @@ class RequestPriority(Enum):
    HIGH = 3
    URGENT = 4

+
 class RequestStatus(Enum):
    """请求状态"""
    PENDING = "pending"
@ -36,6 +38,7 @@ class RequestStatus(Enum):
    TIMEOUT = "timeout"
    CANCELLED = "cancelled"

+
@dataclass
 class StorageRequest:
    """库位请求"""
@ -47,46 +50,49 @@ class StorageRequest:
    task_record_id: str
    priority: RequestPriority = RequestPriority.NORMAL
    created_at: float = field(default_factory=time.time)
-    timeout: float = field(default_factory=lambda: settings.STORAGE_QUEUE_DEFAULT_TIMEOUT if settings.STORAGE_QUEUE_ENABLE_TIMEOUT else float('inf'))
+    timeout: float = field(
+        default_factory=lambda: settings.STORAGE_QUEUE_DEFAULT_TIMEOUT if settings.STORAGE_QUEUE_ENABLE_TIMEOUT else float(
+            'inf'))
    retry_count: int = 0
    max_retries: int = 3
    status: RequestStatus = RequestStatus.PENDING
    result: Optional[Dict[str, Any]] = None
    error_message: Optional[str] = None
-    
+
    def __lt__(self, other):
        """用于优先级队列排序"""
        if self.priority.value != other.priority.value:
            return self.priority.value > other.priority.value  # 高优先级优先
        return self.created_at < other.created_at  # 时间早的优先

+
 class StorageQueueManager:
    """库位请求队列管理器"""
-    
+
    def __init__(self, max_workers: int = settings.STORAGE_QUEUE_MAX_WORKERS, max_queue_size: int = None):
        self.max_workers = max_workers or settings.STORAGE_QUEUE_MAX_WORKERS
        self.max_queue_size = max_queue_size or settings.STORAGE_QUEUE_MAX_SIZE
        self.enable_timeout = settings.STORAGE_QUEUE_ENABLE_TIMEOUT
        self.cleanup_interval = settings.STORAGE_QUEUE_CLEANUP_INTERVAL
        self.completed_request_ttl = settings.STORAGE_QUEUE_COMPLETED_REQUEST_TTL
-        
+
        # 优先级队列
        self.request_queue: List[StorageRequest] = []
        self.queue_lock = asyncio.Lock()
-        
+
        # 请求跟踪
        self.pending_requests: Dict[str, StorageRequest] = {}
        self.processing_requests: Dict[str, StorageRequest] = {}
        self.completed_requests: Dict[str, StorageRequest] = {}
-        
+
        # 工作者管理
        self.workers: List[asyncio.Task] = []
        self.worker_semaphore = asyncio.Semaphore(max_workers)
        self.shutdown_event = asyncio.Event()
-        
+
        # 处理器注册
        self.handlers: Dict[str, Callable] = {}
-        
+
        # 统计信息
        self.stats = {
            'requests_total': 0,
@ -97,35 +103,35 @@ class StorageQueueManager:
            'queue_size': 0,
            'active_workers': 0
        }
-        
+
        # 启动清理任务
        self.cleanup_task = None
-        
+
    async def start(self):
        """启动队列管理器"""
        logger.info("启动库位请求队列管理器")
-        
+
        # 启动工作者
        for i in range(self.max_workers):
            worker = asyncio.create_task(self._worker(f"worker-{i}"))
            self.workers.append(worker)
-        
+
        # 启动清理任务
        self.cleanup_task = asyncio.create_task(self._cleanup_completed_requests())
-        
+
        logger.info(f"队列管理器已启动，工作者数量: {self.max_workers}")
-    
+
    async def stop(self):
        """停止队列管理器"""
        logger.info("停止库位请求队列管理器")
-        
+
        # 设置停止信号
        self.shutdown_event.set()
-        
+
        # 等待所有工作者完成
        if self.workers:
            await asyncio.gather(*self.workers, return_exceptions=True)
-        
+
        # 停止清理任务
        if self.cleanup_task:
            self.cleanup_task.cancel()
@ -133,31 +139,31 @@ class StorageQueueManager:
                await self.cleanup_task
            except asyncio.CancelledError:
                pass
-        
+
        logger.info("队列管理器已停止")
-    
+
    def register_handler(self, handler_type: str, handler_func: Callable):
        """注册处理器"""
        self.handlers[handler_type] = handler_func
        logger.info(f"注册处理器: {handler_type}")
-    
+
    async def submit_request(self, request: StorageRequest) -> str:
        """提交请求到队列"""
        if len(self.request_queue) >= self.max_queue_size:
            raise Exception("队列已满，无法提交新请求")
-        
+
        async with self.queue_lock:
            # 添加到优先级队列
            heapq.heappush(self.request_queue, request)
            self.pending_requests[request.request_id] = request
-            
+
            # 更新统计
            self.stats['requests_total'] += 1
            self.stats['queue_size'] = len(self.request_queue)
-        
+
        logger.debug(f"提交请求到队列: {request.request_id}, 优先级: {request.priority.name}")
        return request.request_id
-    
+
    async def get_request_status(self, request_id: str) -> Optional[Dict[str, Any]]:
        """获取请求状态"""
        # 检查各个状态的请求
@ -187,13 +193,13 @@ class StorageQueueManager:
                "error_message": request.error_message,
                "processing_time": time.time() - request.created_at
            }
-        
+
        return None
-    
+
    async def wait_for_result(self, request_id: str, timeout: float = None) -> Dict[str, Any]:
        """等待请求结果"""
        start_time = time.time()
-        
+
        # 根据配置决定是否使用超时
        if not self.enable_timeout:
            # 禁用超时，无限等待
@ -203,64 +209,79 @@ class StorageQueueManager:
            # 启用超时，使用提供的超时时间或默认值
            check_timeout = timeout or settings.STORAGE_QUEUE_DEFAULT_TIMEOUT
            logger.debug(f"等待请求结果（超时 {check_timeout}s）: {request_id}")
-        
-        while True:
-            # 检查是否完成
-            if request_id in self.completed_requests:
-                request = self.completed_requests[request_id]
-                if request.status == RequestStatus.COMPLETED:
-                    return request.result
-                else:
-                    raise Exception(f"请求失败: {request.error_message}")
-            
-            # 只有在启用超时时才检查超时
-            if self.enable_timeout:
-                # 检查等待超时
-                if time.time() - start_time >= check_timeout:
-                    await self._mark_request_timeout(request_id)
-                    raise Exception("等待结果超时")
-                
-                # 检查请求本身是否超时
-                if request_id in self.pending_requests:
-                    request = self.pending_requests[request_id]
-                    if request.timeout != float('inf') and time.time() - request.created_at > request.timeout:
+
+        try:
+            while True:
+                # 检查是否完成
+                if request_id in self.completed_requests:
+                    request = self.completed_requests[request_id]
+                    if request.status == RequestStatus.COMPLETED:
+                        result = request.result
+                        # 立即清理已完成的请求，避免内存积累
+                        del self.completed_requests[request_id]
+                        logger.debug(f"请求结果已取走并清理: {request_id}")
+                        return result
+                    else:
+                        error_msg = request.error_message
+                        # 失败的请求也立即清理
+                        del self.completed_requests[request_id]
+                        logger.debug(f"请求失败结果已取走并清理: {request_id}")
+                        raise Exception(f"请求失败: {error_msg}")
+
+                # 只有在启用超时时才检查超时
+                if self.enable_timeout:
+                    # 检查等待超时
+                    if time.time() - start_time >= check_timeout:
                        await self._mark_request_timeout(request_id)
-                        raise Exception("请求超时")
-            
-            await asyncio.sleep(0.1)  # 避免忙等待
-    
+                        raise Exception("等待结果超时")
+
+                    # 检查请求本身是否超时
+                    if request_id in self.pending_requests:
+                        request = self.pending_requests[request_id]
+                        if request.timeout != float('inf') and time.time() - request.created_at > request.timeout:
+                            await self._mark_request_timeout(request_id)
+                            raise Exception("请求超时")
+
+                await asyncio.sleep(0.1)  # 避免忙等待
+        except Exception:
+            # 发生异常时，也要尝试清理可能存在的已完成请求
+            if request_id in self.completed_requests:
+                del self.completed_requests[request_id]
+                logger.debug(f"异常时清理请求: {request_id}")
+            raise
+
    async def cancel_request(self, request_id: str) -> bool:
        """取消请求"""
        async with self.queue_lock:
            if request_id in self.pending_requests:
                request = self.pending_requests[request_id]
                request.status = RequestStatus.CANCELLED
-                
+
                # 从队列中移除
                self.request_queue = [r for r in self.request_queue if r.request_id != request_id]
                heapq.heapify(self.request_queue)
-                
+
                # 移动到完成队列
                del self.pending_requests[request_id]
                self.completed_requests[request_id] = request
-                
+
                self.stats['queue_size'] = len(self.request_queue)
                logger.info(f"取消请求: {request_id}")
                return True
-        
+
        return False
-    
+
    def get_queue_stats(self) -> Dict[str, Any]:
        """获取队列统计信息"""
        self.stats['queue_size'] = len(self.request_queue)
        self.stats['active_workers'] = sum(1 for w in self.workers if not w.done())
-        
+
        return self.stats.copy()
-    
+
    async def _worker(self, worker_name: str):
        """工作者协程"""
        logger.info(f"工作者 {worker_name} 启动")
-        
+
        while not self.shutdown_event.is_set():
            try:
                # 获取请求
@ -268,104 +289,108 @@ class StorageQueueManager:
                if not request:
                    await asyncio.sleep(0.1)
                    continue
-                
+
                # 处理请求
                async with self.worker_semaphore:
                    await self._process_request(request, worker_name)
-                    
+
            except Exception as e:
                logger.error(f"工作者 {worker_name} 处理请求异常: {str(e)}")
                await asyncio.sleep(1)
-        
+
        logger.info(f"工作者 {worker_name} 停止")
-    
+
    async def _get_next_request(self) -> Optional[StorageRequest]:
        """获取下一个请求"""
        async with self.queue_lock:
            while self.request_queue:
                request = heapq.heappop(self.request_queue)
-                
+
                # 检查请求是否仍然有效
                if request.request_id in self.pending_requests:
                    # 只有在启用超时且请求设置了有限超时时才检查超时
-                    if (self.enable_timeout and 
-                        request.timeout != float('inf') and 
-                        time.time() - request.created_at > request.timeout):
+                    if (self.enable_timeout and
+                            request.timeout != float('inf') and
+                            time.time() - request.created_at > request.timeout):
                        await self._mark_request_timeout(request.request_id)
                        continue
-                    
+
                    # 移动到处理队列
                    del self.pending_requests[request.request_id]
                    self.processing_requests[request.request_id] = request
                    request.status = RequestStatus.PROCESSING
-                    
+
                    self.stats['queue_size'] = len(self.request_queue)
                    return request
-            
+
            return None
-    
+
    async def _process_request(self, request: StorageRequest, worker_name: str):
        """处理单个请求"""
        start_time = time.time()
        logger.debug(f"工作者 {worker_name} 开始处理请求: {request.request_id}")
-        
+
        try:
            # 获取处理器
            handler = self.handlers.get(request.handler_type)
            if not handler:
                raise Exception(f"未找到处理器: {request.handler_type}")
-            
+
            # 执行处理
            result = await handler(request.input_params, request.context_data, request.map_id, request.task_record_id)
-            
+
            # 标记完成
            await self._mark_request_completed(request.request_id, result)
-            
+
            processing_time = time.time() - start_time
            logger.debug(f"工作者 {worker_name} 完成请求: {request.request_id}, 用时: {processing_time:.3f}s")
-            
+
        except Exception as e:
            # 处理失败
            error_msg = str(e)
            logger.error(f"工作者 {worker_name} 处理请求失败: {request.request_id}, 错误: {error_msg}")
-            
+
            # 检查是否需要重试
            if request.retry_count < request.max_retries:
                await self._retry_request(request)
            else:
                await self._mark_request_failed(request.request_id, error_msg)
-        
+
        finally:
-            # 更新平均处理时间
+            # 更新平均处理时间（只在这里统计，移除 _mark_request_completed 中的重复计数）
            processing_time = time.time() - start_time
-            total_time = self.stats['avg_processing_time'] * self.stats['requests_completed']
-            self.stats['requests_completed'] += 1
-            self.stats['avg_processing_time'] = (total_time + processing_time) / self.stats['requests_completed']
-    
+            if self.stats['requests_completed'] > 0:
+                total_time = self.stats['avg_processing_time'] * self.stats['requests_completed']
+                self.stats['avg_processing_time'] = (total_time + processing_time) / (
+                            self.stats['requests_completed'] + 1)
+            else:
+                self.stats['avg_processing_time'] = processing_time
+
    async def _mark_request_completed(self, request_id: str, result: Dict[str, Any]):
        """标记请求完成"""
        if request_id in self.processing_requests:
            request = self.processing_requests[request_id]
            request.status = RequestStatus.COMPLETED
            request.result = result
-            
+
            del self.processing_requests[request_id]
            self.completed_requests[request_id] = request
-            
+
+            # 更新完成计数（移除了重复计数）
            self.stats['requests_completed'] += 1
-    
+
    async def _mark_request_failed(self, request_id: str, error_message: str):
        """标记请求失败"""
        if request_id in self.processing_requests:
            request = self.processing_requests[request_id]
            request.status = RequestStatus.FAILED
            request.error_message = error_message
-            
+
            del self.processing_requests[request_id]
            self.completed_requests[request_id] = request
-            
+
            self.stats['requests_failed'] += 1
-    
+
    async def _mark_request_timeout(self, request_id: str):
        """标记请求超时"""
        request = None
@ -375,58 +400,63 @@ class StorageQueueManager:
        elif request_id in self.processing_requests:
            request = self.processing_requests[request_id]
            del self.processing_requests[request_id]
-        
+
        if request:
            request.status = RequestStatus.TIMEOUT
            request.error_message = "请求超时"
            self.completed_requests[request_id] = request
            self.stats['requests_timeout'] += 1
-    
+
    async def _retry_request(self, request: StorageRequest):
        """重试请求"""
        request.retry_count += 1
        request.status = RequestStatus.PENDING
-        
+
        # 重新加入队列
        async with self.queue_lock:
            heapq.heappush(self.request_queue, request)
-            
+
            del self.processing_requests[request.request_id]
            self.pending_requests[request.request_id] = request
-            
+
            self.stats['queue_size'] = len(self.request_queue)
-        
+
        logger.info(f"重试请求: {request.request_id}, 第 {request.retry_count} 次重试")
-    
+
    def _get_queue_position(self, request_id: str) -> int:
        """获取请求在队列中的位置"""
        for i, request in enumerate(self.request_queue):
            if request.request_id == request_id:
                return i + 1
        return -1
-    
+
    async def _cleanup_completed_requests(self):
-        """清理已完成的请求"""
+        """
+        清理已完成的请求（兜底机制）
+        注意：采用立即清理模式后，此方法主要作为兜底保护，防止异常情况下的内存泄漏
+        """
        while not self.shutdown_event.is_set():
            try:
                await asyncio.sleep(self.cleanup_interval)  # 使用配置的清理间隔
-                
+
                current_time = time.time()
                cleanup_threshold = self.completed_request_ttl  # 使用配置的保留时间
-                
+
                to_remove = []
                for request_id, request in self.completed_requests.items():
                    if current_time - request.created_at > cleanup_threshold:
                        to_remove.append(request_id)
-                
+
                for request_id in to_remove:
                    del self.completed_requests[request_id]
-                
+
                if to_remove:
-                    logger.info(f"清理了 {len(to_remove)} 个已完成的请求")
-                    
+                    logger.info(
+                        f"兜底清理了 {len(to_remove)} 个未被取走的已完成请求（可能存在未调用 wait_for_result 的情况）")
+
            except Exception as e:
                logger.error(f"清理已完成请求异常: {str(e)}")

+
 # 全局队列管理器实例
 storage_queue_manager = StorageQueueManager()
--- a/services/execution/task_context.py
+++ b/services/execution/task_context.py
@ -61,7 +61,8 @@ class TaskContext:
        self.map_id = map_id  # 地图ID
        self.parent_log_id = None  # 当前父日志ID(用于建立层级关系)
        self.current_iteration_index = None  # 当前迭代索引
-        
+        self._is_cleaned = False  # 标记是否已清理，防止重复清理
+
    def set_current_block(self, block_id: str, block_name: str):
        """
        设置当前正在执行的块
@ -396,28 +397,36 @@ class TaskContext:
    def cleanup(self) -> None:
        """
        清理上下文数据，释放内存
-        用于任务执行完成后释放大型数据结构，防止内存泄漏
+        修复内存泄漏: 任务完成后及时清理大型数据结构
+        优化：彻底置空所有引用，而不是仅清空容器
        """
+        # # 防止重复清理
+        if self._is_cleaned:
+            return
+
        try:
-            # 清理大型字典
-            if self.variables:
-                self.variables.clear()
-            if self.variable_sources:
-                self.variable_sources.clear()
-            if self.block_outputs:
-                self.block_outputs.clear()
-            if self.outputs:
-                self.outputs.clear()
-
-            # 清理列表
-            if self.execution_path:
-                self.execution_path.clear()
-
-            # 清空引用
-            self.input_params = {}
+            # 彻底清空所有字典和列表（直接置 None，而不是 clear()）
+            self.variables = None
+            self.variable_sources = None
+            self.block_outputs = None
+            self.outputs = None
+            self.execution_path = None
+            self.input_params = None
            self.error = None

-            logger.debug(f"任务上下文 {self.task_record_id} 已清理")
+            # 清理其他可能的大对象引用
+            self.token = None
+            self.map_id = None
+            self.block_record_id = None
+            self.skip_to_component_id = None
+            self.failure_reason = None
+
+            # 清理 ID 和名称引用
+            self.current_block_id = None
+            self.current_block_name = None
+
+            self._is_cleaned = True
+            logger.debug(f"任务上下文 {self.task_record_id} 已彻底清理")
        except Exception as e:
            logger.error(f"清理任务上下文失败: {str(e)}")

--- a/services/execution/task_executor.py
+++ b/services/execution/task_executor.py
@ -38,7 +38,7 @@ class TaskExecutor:
    def __init__(self, task_record_id: str):
        """
        初始化任务执行器
-        
+
        Args:
            task_record_id: 任务记录ID
        """
@ -52,6 +52,7 @@ class TaskExecutor:
        self.timeout = 3600*10  # 默认超时时间：10小时（秒）
        self.is_canceled = False
        self.is_error = False
+        self._is_cleaned = False  # 防止重复清理标志
    
    def set_timeout(self, timeout_seconds: int) -> None:
        """
@ -215,7 +216,7 @@ class TaskExecutor:
            task_detail = json.loads(task_detail_str)
            root_block = task_detail.get("rootBlock", {})
            release_sites = self.task_def.release_sites
-            print("root_block:::::::::::", root_block)
+            # print("root_block:::::::::::", root_block)
            # 更新任务状态为执行中
            async with get_async_session() as session:
                await self._update_task_status(session, TaskStatus.RUNNING, "任务执行中", task_detail=task_detail_str)
@ -384,22 +385,9 @@ class TaskExecutor:
        finally:
            self.is_running = False

-            # 清理任务执行器，释放内存
+            # 统一调用 cleanup() 方法清理所有资源
            try:
-                if self.task_context:
-                    if hasattr(self.task_context, 'cleanup'):
-                        self.task_context.cleanup()
-                    self.task_context = None
-
-                if self.block_executor:
-                    self.block_executor.task_context = None
-                    self.block_executor = None
-
-                # 清空任务记录引用
-                self.task_record = None
-                self.task_def = None
-
-                logger.debug(f"任务执行器 {self.task_record_id} 已清理")
+                self.cleanup()
            except Exception as cleanup_error:
                logger.error(f"清理任务执行器失败: {str(cleanup_error)}")
    
@ -624,4 +612,51 @@ class TaskExecutor:
                    
        except Exception as e:
            logger.error(f"外部API同步失败: {str(e)}")
-            # 外部API失败不应该影响主任务流程，只记录日志 
+            # 外部API失败不应该影响主任务流程，只记录日志
+
+    def cleanup(self) -> None:
+        """
+        清理 TaskExecutor 占用的资源
+        优化：彻底清理所有子组件和引用，防止内存泄漏
+        """
+        # 防止重复清理
+        if self._is_cleaned:
+            return
+
+        try:
+            # 先清理 block_executor (按依赖顺序: block_executor -> task_context)
+            if self.block_executor:
+                try:
+                    if hasattr(self.block_executor, 'cleanup'):
+                        self.block_executor.cleanup()
+                except Exception as e:
+                    logger.error(f"清理 block_executor 失败: {str(e)}")
+                finally:
+                    self.block_executor = None
+
+            # 再清理 task_context
+            if self.task_context:
+                try:
+                    if hasattr(self.task_context, 'cleanup'):
+                        self.task_context.cleanup()
+                except Exception as e:
+                    logger.error(f"清理 task_context 失败: {str(e)}")
+                finally:
+                    self.task_context = None
+
+            # 清空其他引用
+            self.task_record = None
+            self.task_def = None
+            self.error_message = None
+
+            # 重置状态
+            self.is_running = False
+            self.is_canceled = False
+            self.is_error = False
+
+            # 标记已清理
+            self._is_cleaned = True
+
+            logger.debug(f"TaskExecutor {self.task_record_id} 已彻底清理")
+        except Exception as e:
+            logger.error(f"清理 TaskExecutor 失败: {str(e)}")
--- a/services/sync_service.py
+++ b/services/sync_service.py
@ -727,10 +727,9 @@ async def set_task_failed(task_id: str, token: str = None) -> Optional[ApiRespon
    # 构建请求头
    headers = {}
    headers[TFApiConfig.TOKEN_HEADER] = token
-    headers["x-tenant-id"] = "1000"
+    # headers["x-tenant-id"] = "1000"
    try:
        logger.info(f"正在设置任务状态为已失败: {task_id}")
-        
        timeout = aiohttp.ClientTimeout(total=TFApiConfig.TIMEOUT, connect=5)
        async with aiohttp.ClientSession(timeout=timeout, trust_env=False) as session:
            async with session.put(