修复内存溢出问题

This commit is contained in:
靳中伟 2025-10-19 15:33:54 +08:00
parent 23b7b99dc6
commit 3f578c03e8
21 changed files with 15003 additions and 1005 deletions

View File

@ -245,10 +245,10 @@ class BaseConfig(BaseSettings):
DB_NAME: str = Field(default=_db_config['database'], env="DB_NAME")
DB_CHARSET: str = Field(default=_db_config['charset'], env="DB_CHARSET")
DB_ECHO: bool = False # 是否输出SQL语句
DB_POOL_SIZE: int = 50 # 增加连接池基础大小
DB_MAX_OVERFLOW: int = 100 # 增加溢出连接数
DB_POOL_RECYCLE: int = 1800 # 减少连接回收时间,防止连接过期
DB_POOL_TIMEOUT: int = 60 # 获取连接的超时时间
DB_POOL_SIZE: int = 10 # 连接池基础大小优化从50降到10节省内存
DB_MAX_OVERFLOW: int = 20 # 溢出连接数优化从100降到20节省内存
DB_POOL_RECYCLE: int = 1800 # 连接回收时间30分钟,防止连接过期
DB_POOL_TIMEOUT: int = 30 # 获取连接的超时时间优化从60降到30秒
DB_POOL_PRE_PING: bool = True # 连接前检测连接可用性
# Redis配置

View File

@ -62,7 +62,7 @@ TASK_TYPE_AREA={
"LG": "AGW/PL"
}
# 从环境变量读取配置,或使用默认值
TF_API_BASE_URL = os.getenv("TF_API_BASE_URL", "http://192.168.189.206:8080/jeecg-boot")
TF_API_BASE_URL = os.getenv("TF_API_BASE_URL", "http://192.168.189.187:8080/jeecg-boot")
# TF_API_BASE_URL = os.getenv("TF_API_BASE_URL", "http://111.231.146.230:4080/jeecg-boot")
TF_API_TIMEOUT = int(os.getenv("TF_API_TIMEOUT", "10")) # 减少超时时间从60秒到10秒
TF_API_RETRY_TIMES = int(os.getenv("TF_API_RETRY_TIMES", "3"))

View File

@ -0,0 +1,89 @@
# 选择机器人块输出参数立即更新说明
## 修改背景
选择机器人块CSelectAgvBp和机器人执行动作块CAgvOperationBp是嵌套关系。之前的实现中虽然选择机器人块在选择完成后状态就显示为成功但是输出内容选择出的机器人结果要等到整个嵌套的动作块执行完成后才会更新到表中在任务记录详情里才能显示出来。这不符合实际的显示需求。
## 修改内容
### 1. 修改 `RobotBlockHandler.update_block_record` 方法
**文件**: `services/execution/handlers/robot_scheduling.py`
**修改点**:
- 增加 `block_name``output_data` 参数
- 在选择机器人成功后,立即更新块记录的输出参数到数据库
- 构建完整的输出结构:`{"blocks": {"块名称": {"selectedAgvId": "机器人名称"}}}`
- 同时更新 `output_params``block_out_params_value` 字段
### 2. 修改选择机器人块的调用逻辑
**文件**: `services/execution/handlers/robot_scheduling.py`
**修改点**:
- 在选择机器人成功后第897-902行立即调用 `update_block_record` 方法
- 传入块名称和输出数据(包含选择的机器人名称)
- 这样用户可以立即在任务记录详情中看到选择的机器人
## 执行流程
### 修改前的流程
1. 选择机器人块开始执行
2. 调用天风系统接口选择机器人
3. 等待机器人选择完成
4. 更新块记录状态为成功(但不更新输出参数)
5. 执行嵌套的动作块
6. **等待所有子块执行完成**
7. **最后才更新输出参数到数据库**
8. 用户才能看到选择的机器人
### 修改后的流程
1. 选择机器人块开始执行
2. 调用天风系统接口选择机器人
3. 等待机器人选择完成
4. **立即更新块记录状态为成功,并更新输出参数到数据库**
5. **用户立即可以在任务记录详情中看到选择的机器人**
6. 执行嵌套的动作块
7. 所有子块执行完成后,再次更新输出参数(内容相同,不影响显示)
## 技术细节
### 输出参数格式
```json
{
"blocks": {
"块名称": {
"selectedAgvId": "机器人名称"
}
}
}
```
### 数据库字段
- `output_params`: 完整的输出结构(包含 blocks 层级)
- `block_out_params_value`: 输出参数值(只包含选择的机器人信息)
## 注意事项
1. **输出参数会被更新两次**:第一次在选择机器人完成时立即更新,第二次在所有子块执行完成后再次更新。两次更新的内容相同,不会造成问题。
2. **不影响子块执行**:立即更新输出参数不会影响子块的执行逻辑,子块仍然可以正常访问 context 中的机器人信息。
3. **兼容性**这个修改不会影响其他类型的块只针对选择机器人块CSelectAgvBp
## 测试建议
1. 启动一个包含选择机器人块和嵌套动作块的任务
2. 在选择机器人完成后,立即查询任务记录详情
3. 验证能否看到选择的机器人信息
4. 等待动作块执行完成后,再次查询任务记录详情
5. 验证机器人信息保持一致
## 相关文件
- `services/execution/handlers/robot_scheduling.py` - 选择机器人块处理器
- `services/execution/block_executor.py` - 块执行器
- `data/models/blockrecord.py` - 块记录数据模型
## 修改日期
2025-10-17

File diff suppressed because it is too large Load Diff

6247
logs/app.log.2025-10-16 Normal file

File diff suppressed because it is too large Load Diff

View File

@ -9,9 +9,10 @@
import json
import asyncio
import aiohttp
from typing import Dict, Any
from typing import Dict, Any, Set, Optional
from fastapi import APIRouter, Body, Request, Path
from routes.model.external_task_model import ExternalTaskRequest, ExternalTaskResponse, TaskTypeEnum, GenAgvSchedulingTaskRequest, CancelTaskRequest
from routes.model.external_task_model import ExternalTaskRequest, ExternalTaskResponse, TaskTypeEnum, \
GenAgvSchedulingTaskRequest, CancelTaskRequest
from routes.model.task_edit_model import TaskEditRunRequest, TaskInputParamNew, InputParamType
from services.task_edit_service import TaskEditService
from services.external_task_record_service import ExternalTaskRecordService
@ -19,10 +20,10 @@ from services.task_record_service import TaskRecordService
from services.sync_service import set_task_terminated, get_login_token, refresh_token_if_needed
from routes.common_api import format_response, error_response
from utils.logger import get_logger
from utils.background_task_manager import create_background_task
from data.enum.task_record_enum import SourceType, TaskStatus
from data.models.external_task_record import ExternalTaskStatusEnum
from config.tf_api_config import TF_API_TOKEN, TF_API_BASE_URL, CM_ID, DG_ID, TASK_TYPE_PRIORITY, TASK_TYPE_AREA, TF_WEB_POST, sync_disabled_label
from config.tf_api_config import TF_API_TOKEN, TF_API_BASE_URL, CM_ID, DG_ID, TASK_TYPE_PRIORITY, TASK_TYPE_AREA, \
TF_WEB_POST, sync_disabled_label
# 创建路由
router = APIRouter(
@ -33,15 +34,130 @@ router = APIRouter(
# 设置日志
logger = get_logger("app.external_task_api")
# 后台监控任务集合,用于追踪和清理
_background_monitor_tasks: Set[asyncio.Task] = set()
# 全局HTTP会话用于复用连接池
_global_http_session: Optional[aiohttp.ClientSession] = None
_session_lock = asyncio.Lock() # 确保线程安全
async def get_http_session() -> aiohttp.ClientSession:
"""
获取全局HTTP会话如果不存在则创建
使用连接池复用减少内存占用和TCP连接数
Returns:
aiohttp.ClientSession: 全局HTTP会话对象
"""
global _global_http_session
async with _session_lock:
if _global_http_session is None or _global_http_session.closed:
# 配置连接器
connector = aiohttp.TCPConnector(
limit=100, # 总连接数限制
limit_per_host=30, # 每个主机的连接数限制
ttl_dns_cache=300, # DNS缓存时间
force_close=False, # 允许连接复用
enable_cleanup_closed=True # 启用清理关闭的连接
)
# 配置超时
timeout = aiohttp.ClientTimeout(
total=60, # 总超时时间
connect=10, # 连接超时
sock_read=30 # 读取超时
)
_global_http_session = aiohttp.ClientSession(
connector=connector,
timeout=timeout,
raise_for_status=False # 不自动抛出HTTP错误
)
logger.info("创建全局HTTP会话启用连接池复用")
return _global_http_session
async def close_http_session() -> None:
"""
关闭全局HTTP会话应用关闭时调用
"""
global _global_http_session
if _global_http_session and not _global_http_session.closed:
await _global_http_session.close()
logger.info("已关闭全局HTTP会话")
_global_http_session = None
# 等待连接完全关闭
await asyncio.sleep(0.25)
def _cleanup_background_task(task: asyncio.Task) -> None:
"""
清理完成的后台任务
这个回调函数会在任务完成取消或异常时自动调用
"""
_background_monitor_tasks.discard(task)
# 记录任务完成状态
try:
if task.cancelled():
logger.debug(f"后台监控任务已取消,已清理。当前活跃任务数: {len(_background_monitor_tasks)}")
elif task.exception():
logger.error(
f"后台监控任务异常结束: {task.exception()},已清理。当前活跃任务数: {len(_background_monitor_tasks)}")
else:
logger.debug(f"后台监控任务正常完成,已清理。当前活跃任务数: {len(_background_monitor_tasks)}")
except Exception as e:
logger.error(f"清理后台任务时出错: {str(e)}")
def get_active_monitor_tasks_count() -> int:
"""
获取当前活跃的监控任务数量
可用于监控和调试
Returns:
int: 当前活跃的监控任务数量
"""
return len(_background_monitor_tasks)
async def cancel_all_monitor_tasks() -> int:
"""
取消所有活跃的监控任务仅用于应用关闭时
Returns:
int: 被取消的任务数量
"""
count = len(_background_monitor_tasks)
if count > 0:
logger.info(f"正在取消 {count} 个活跃的监控任务...")
# 复制集合以避免在迭代时修改
tasks_to_cancel = list(_background_monitor_tasks)
for task in tasks_to_cancel:
if not task.done():
task.cancel()
# 等待所有任务完成取消
if tasks_to_cancel:
await asyncio.gather(*tasks_to_cancel, return_exceptions=True)
logger.info(f"已取消所有监控任务,共 {count}")
return count
# 外部回调接口URL
EXTERNAL_CALLBACK_URL = "http://roh.vwfawedl.mobi:9001/AGVService/ContainerSendBackRequest" # 生产线到毛坯库任务
AGV_GOODS_MOVE_URL = "http://roh.vwfawedl.mobi:9001/AGVService/HUGoodsMoveRequest" # 毛坯库到产线任务
EXTERNAL_CALLBACK_URL = "http://roh.vwfawedl.mobi:9001/AGVService/ContainerSendBackRequest" # 生产线到毛坯库任务
AGV_GOODS_MOVE_URL = "http://roh.vwfawedl.mobi:9001/AGVService/HUGoodsMoveRequest" # 毛坯库到产线任务
async def get_tf_api_token() -> str:
"""
获取TF API Token优先使用动态获取的token失败时使用默认值
Returns:
str: 可用的API token
"""
@ -53,7 +169,7 @@ async def get_tf_api_token() -> str:
return token
except Exception as e:
logger.warning(f"获取动态token失败: {str(e)}")
# 如果获取失败使用配置中的默认token
logger.info("使用默认配置中的token")
return TF_API_TOKEN
@ -61,7 +177,7 @@ async def get_tf_api_token() -> str:
async def call_external_callback(arrival_no: str, arrival_user: str = "000307") -> bool:
"""
调用外部回调接口
调用外部回调接口使用全局Session复用连接池
Args:
arrival_no: 到货编号ReqCode
@ -80,23 +196,24 @@ async def call_external_callback(arrival_no: str, arrival_user: str = "000307")
while retry_count < max_retries:
try:
async with aiohttp.ClientSession() as session:
async with session.post(EXTERNAL_CALLBACK_URL, json=payload) as response:
result = await response.json()
logger.info(f"外部接口调用响应: {result}, arrival_no={arrival_no}, 重试次数={retry_count}")
# 使用全局HTTP会话复用连接池
session = await get_http_session()
async with session.post(EXTERNAL_CALLBACK_URL, json=payload) as response:
result = await response.json()
logger.info(f"外部接口调用响应: {result}, arrival_no={arrival_no}, 重试次数={retry_count}")
# 检查响应结果
if result.get("result") == "0":
logger.info(f"外部接口调用成功: arrival_no={arrival_no}, 总重试次数={retry_count}")
return True
elif result.get("result") == "1":
logger.info(f"外部接口返回result=1继续重试: arrival_no={arrival_no}, 重试次数={retry_count}")
retry_count += 1
await asyncio.sleep(5) # 等待5秒后重试
else:
logger.error(f"外部接口返回异常结果: {result}, arrival_no={arrival_no}")
retry_count += 1
await asyncio.sleep(5)
# 检查响应结果
if result.get("result") == "0":
logger.info(f"外部接口调用成功: arrival_no={arrival_no}, 总重试次数={retry_count}")
return True
elif result.get("result") == "1":
logger.info(f"外部接口返回result=1继续重试: arrival_no={arrival_no}, 重试次数={retry_count}")
retry_count += 1
await asyncio.sleep(5) # 等待5秒后重试
else:
logger.error(f"外部接口返回异常结果: {result}, arrival_no={arrival_no}")
retry_count += 1
await asyncio.sleep(5)
except Exception as e:
logger.error(f"调用外部接口异常: {str(e)}, arrival_no={arrival_no}, 重试次数={retry_count}")
@ -106,9 +223,10 @@ async def call_external_callback(arrival_no: str, arrival_user: str = "000307")
logger.error(f"外部接口调用失败,已达到最大重试次数: arrival_no={arrival_no}, 最大重试次数={max_retries}")
return False
async def call_agv_goods_move_callback(pid: str, user_id: str = "000307") -> bool:
"""
调用AGV货物移动回调接口
调用AGV货物移动回调接口使用全局Session复用连接池
Args:
pid: 对应的req_code
@ -127,23 +245,24 @@ async def call_agv_goods_move_callback(pid: str, user_id: str = "000307") -> boo
while retry_count < max_retries:
try:
async with aiohttp.ClientSession() as session:
async with session.post(AGV_GOODS_MOVE_URL, json=payload) as response:
result = await response.json()
logger.info(f"AGV货物移动接口调用响应: {result}, PID={pid}, 重试次数={retry_count}")
# 使用全局HTTP会话复用连接池
session = await get_http_session()
async with session.post(AGV_GOODS_MOVE_URL, json=payload) as response:
result = await response.json()
logger.info(f"AGV货物移动接口调用响应: {result}, PID={pid}, 重试次数={retry_count}")
# 检查响应结果
if result.get("result") == "0":
logger.info(f"AGV货物移动接口调用成功: PID={pid}, 总重试次数={retry_count}")
return True
elif result.get("result") == "1":
logger.info(f"AGV货物移动接口返回result=1继续重试: PID={pid}, 重试次数={retry_count}")
retry_count += 1
await asyncio.sleep(5) # 等待5秒后重试
else:
logger.error(f"AGV货物移动接口返回异常结果: {result}, PID={pid}")
retry_count += 1
await asyncio.sleep(5)
# 检查响应结果
if result.get("result") == "0":
logger.info(f"AGV货物移动接口调用成功: PID={pid}, 总重试次数={retry_count}")
return True
elif result.get("result") == "1":
logger.info(f"AGV货物移动接口返回result=1继续重试: PID={pid}, 重试次数={retry_count}")
retry_count += 1
await asyncio.sleep(5) # 等待5秒后重试
else:
logger.error(f"AGV货物移动接口返回异常结果: {result}, PID={pid}")
retry_count += 1
await asyncio.sleep(5)
except Exception as e:
logger.error(f"调用AGV货物移动接口异常: {str(e)}, PID={pid}, 重试次数={retry_count}")
@ -153,6 +272,7 @@ async def call_agv_goods_move_callback(pid: str, user_id: str = "000307") -> boo
logger.error(f"AGV货物移动接口调用失败已达到最大重试次数: PID={pid}, 最大重试次数={max_retries}")
return False
async def monitor_task_and_callback(task_record_id: str, req_code: str):
"""
异步监控任务状态并在成功时调用外部回调接口
@ -163,50 +283,54 @@ async def monitor_task_and_callback(task_record_id: str, req_code: str):
"""
logger.info(f"开始监控任务状态: task_record_id={task_record_id}, req_code={req_code}")
# max_wait_time = 1800 # 最大等待时间30分钟
# wait_count = 0
try:
while True:
try:
task_detail_result = await TaskRecordService.get_task_record_detail(task_record_id)
while True:
try:
task_detail_result = await TaskRecordService.get_task_record_detail(task_record_id)
if task_detail_result.get("success", False):
task_detail = task_detail_result.get("data", {})
task_status = task_detail.get("status", "")
if task_detail_result.get("success", False):
task_detail = task_detail_result.get("data", {})
task_status = task_detail.get("status", "")
logger.info(f"监控任务状态: task_record_id={task_record_id}, status={task_status}")
logger.info(f"监控任务状态: task_record_id={task_record_id}, status={task_status}")
# 如果任务已完成(成功)
if task_status == TaskStatus.COMPLETED:
logger.info(
f"任务执行成功,开始调用外部回调接口: task_record_id={task_record_id}, req_code={req_code}")
# 调用外部回调接口
success = await call_external_callback(req_code)
if success:
logger.info(f"外部回调接口调用成功: task_record_id={task_record_id}, req_code={req_code}")
else:
logger.error(f"外部回调接口调用失败: task_record_id={task_record_id}, req_code={req_code}")
break
# 如果任务已完成(成功)
if task_status == TaskStatus.COMPLETED:
logger.info(f"任务执行成功,开始调用外部回调接口: task_record_id={task_record_id}, req_code={req_code}")
# 调用外部回调接口
success = await call_external_callback(req_code)
if success:
logger.info(f"外部回调接口调用成功: task_record_id={task_record_id}, req_code={req_code}")
# 如果任务已失败或取消
elif task_status in [TaskStatus.FAILED, TaskStatus.CANCELED]:
logger.info(
f"任务执行失败或取消,不调用外部回调接口: task_record_id={task_record_id}, status={task_status}")
break
# 任务还在运行中,继续等待
else:
logger.error(f"外部回调接口调用失败: task_record_id={task_record_id}, req_code={req_code}")
break
logger.debug(f"任务仍在执行中,继续等待: task_record_id={task_record_id}, status={task_status}")
await asyncio.sleep(2)
# 如果任务已失败或取消
elif task_status in [TaskStatus.FAILED, TaskStatus.CANCELED]:
logger.info(f"任务执行失败或取消,不调用外部回调接口: task_record_id={task_record_id}, status={task_status}")
break
# 任务还在运行中,继续等待
else:
logger.debug(f"任务仍在执行中,继续等待: task_record_id={task_record_id}, status={task_status}")
await asyncio.sleep(2) # 等待10秒
# wait_count += 10
logger.warning(f"无法获取任务详情,继续等待: task_record_id={task_record_id}")
await asyncio.sleep(2)
else:
logger.warning(f"无法获取任务详情,继续等待: task_record_id={task_record_id}")
await asyncio.sleep(2) # 等待10秒
# wait_count += 10
except asyncio.CancelledError:
logger.info(f"监控任务被取消: task_record_id={task_record_id}, req_code={req_code}")
raise # 重新抛出以便正确处理取消
except Exception as e:
logger.error(f"监控任务状态时出现异常: {str(e)}, task_record_id={task_record_id}")
await asyncio.sleep(2)
finally:
# 确保释放所有资源
logger.info(f"监控任务结束,清理资源: task_record_id={task_record_id}, req_code={req_code}")
except Exception as e:
logger.error(f"监控任务状态时出现异常: {str(e)}, task_record_id={task_record_id}")
await asyncio.sleep(2) # 等待10秒
# wait_count += 10
async def monitor_agv_task_and_callback(task_record_id: str, req_code: str):
"""
@ -218,55 +342,68 @@ async def monitor_agv_task_and_callback(task_record_id: str, req_code: str):
"""
logger.info(f"开始监控AGV调度任务状态: task_record_id={task_record_id}, req_code={req_code}")
while True:
try:
task_detail_result = await TaskRecordService.get_task_record_detail(task_record_id)
try:
while True:
try:
task_detail_result = await TaskRecordService.get_task_record_detail(task_record_id)
if task_detail_result.get("success", False):
task_detail = task_detail_result.get("data", {})
task_status = task_detail.get("status", "")
if task_detail_result.get("success", False):
task_detail = task_detail_result.get("data", {})
task_status = task_detail.get("status", "")
logger.info(f"监控AGV调度任务状态: task_record_id={task_record_id}, status={task_status}")
logger.info(f"监控AGV调度任务状态: task_record_id={task_record_id}, status={task_status}")
# 如果任务已完成(成功)
if task_status == TaskStatus.COMPLETED:
logger.info(f"AGV调度任务执行成功开始调用AGV货物移动回调接口: task_record_id={task_record_id}, req_code={req_code}")
# 调用AGV货物移动回调接口
success = await call_agv_goods_move_callback(req_code)
if success:
logger.info(f"AGV货物移动回调接口调用成功: task_record_id={task_record_id}, req_code={req_code}")
# 如果任务已完成(成功)
if task_status == TaskStatus.COMPLETED:
logger.info(
f"AGV调度任务执行成功开始调用AGV货物移动回调接口: task_record_id={task_record_id}, req_code={req_code}")
# 调用AGV货物移动回调接口
success = await call_agv_goods_move_callback(req_code)
if success:
logger.info(
f"AGV货物移动回调接口调用成功: task_record_id={task_record_id}, req_code={req_code}")
else:
logger.error(
f"AGV货物移动回调接口调用失败: task_record_id={task_record_id}, req_code={req_code}")
break
# 如果任务已失败或取消
elif task_status in [TaskStatus.FAILED, TaskStatus.CANCELED]:
logger.info(
f"AGV调度任务执行失败或取消不调用AGV货物移动回调接口: task_record_id={task_record_id}, status={task_status}")
break
# 任务还在运行中,继续等待
else:
logger.error(f"AGV货物移动回调接口调用失败: task_record_id={task_record_id}, req_code={req_code}")
break
logger.debug(
f"AGV调度任务仍在执行中继续等待: task_record_id={task_record_id}, status={task_status}")
await asyncio.sleep(2)
# 如果任务已失败或取消
elif task_status in [TaskStatus.FAILED, TaskStatus.CANCELED]:
logger.info(f"AGV调度任务执行失败或取消不调用AGV货物移动回调接口: task_record_id={task_record_id}, status={task_status}")
break
# 任务还在运行中,继续等待
else:
logger.debug(f"AGV调度任务仍在执行中继续等待: task_record_id={task_record_id}, status={task_status}")
await asyncio.sleep(2) # 等待2秒
logger.warning(f"无法获取AGV调度任务详情继续等待: task_record_id={task_record_id}")
await asyncio.sleep(2)
else:
logger.warning(f"无法获取AGV调度任务详情继续等待: task_record_id={task_record_id}")
await asyncio.sleep(2) # 等待2秒
except asyncio.CancelledError:
logger.info(f"AGV监控任务被取消: task_record_id={task_record_id}, req_code={req_code}")
raise # 重新抛出以便正确处理取消
except Exception as e:
logger.error(f"监控AGV调度任务状态时出现异常: {str(e)}, task_record_id={task_record_id}")
await asyncio.sleep(2)
finally:
# 确保释放所有资源
logger.info(f"AGV监控任务结束清理资源: task_record_id={task_record_id}, req_code={req_code}")
except Exception as e:
logger.error(f"监控AGV调度任务状态时出现异常: {str(e)}, task_record_id={task_record_id}")
await asyncio.sleep(2) # 等待2秒
async def check_task_permission(tf_api_token: str, tf_api_base_url: str, module_name: str = "其他") -> bool:
"""
检查是否允许处理任务
检查是否允许处理任务使用全局Session复用连接池
调用参数配置-三方接口调用接口检查系统限制
Args:
tf_api_token: API访问令牌
tf_api_base_url: API基础URL
module_name: 模块名称默认为"其他"
Returns:
bool: True表示允许处理任务False表示被限制
"""
@ -274,52 +411,54 @@ async def check_task_permission(tf_api_token: str, tf_api_base_url: str, module_
"X-Access-Token": tf_api_token,
"Content-Type": "text/plain"
}
# 构建 API URL
api_url = f"{tf_api_base_url}/parameter/getByModule"
try:
async with aiohttp.ClientSession() as session:
async with session.get(api_url, data=module_name, headers=headers) as response:
if response.status == 200:
result = await response.json()
logger.info(f"参数配置接口调用成功: result={result}")
# 检查响应格式
if result.get("success", False):
parameter_result = result.get("result", {})
sync_disabled = parameter_result.get(sync_disabled_label, "false")
# 如果 sync_disabled 为 "true",则被限制
if sync_disabled == "true":
logger.warning("系统限制创建任务: sync_disabled=true")
return False
else:
logger.info("系统允许创建任务: sync_disabled=false")
return True
# 使用全局HTTP会话复用连接池
session = await get_http_session()
async with session.get(api_url, data=module_name, headers=headers) as response:
if response.status == 200:
result = await response.json()
logger.info(f"参数配置接口调用成功: result={result}")
# 检查响应格式
if result.get("success", False):
parameter_result = result.get("result", {})
sync_disabled = parameter_result.get(sync_disabled_label, "false")
# 如果 sync_disabled 为 "true",则被限制
if sync_disabled == "true":
logger.warning("系统限制创建任务: sync_disabled=true")
return False
else:
# 如果接口调用失败,默认允许处理任务
logger.warning(f"参数配置接口调用失败: {result.get('message', '未知错误')}")
logger.info("系统允许创建任务: sync_disabled=false")
return True
else:
logger.error(f"参数配置接口调用失败: status={response.status}")
response_text = await response.text()
logger.error(f"响应内容: {response_text}")
# 如果接口调用失败,默认允许处理任务
logger.warning(f"参数配置接口调用失败: {result.get('message', '未知错误')}")
return True
else:
logger.error(f"参数配置接口调用失败: status={response.status}")
response_text = await response.text()
logger.error(f"响应内容: {response_text}")
# 如果接口调用失败,默认允许处理任务
return True
except Exception as e:
logger.error(f"系统接口服务异常: error={str(e)}")
# 如果出现异常,默认允许处理任务
return False
async def get_amr_loading_state(task_record_id: str, tf_api_token: str) -> Dict[str, Any]:
"""
获取任务中小车负载状态
获取任务中小车负载状态使用全局Session复用连接池
Args:
task_record_id: 天风任务ID
tf_api_token: API访问令牌
Returns:
Dict[str, Any]: 包含小车负载状态的响应数据
"""
@ -327,26 +466,27 @@ async def get_amr_loading_state(task_record_id: str, tf_api_token: str) -> Dict[
"X-Access-Token": tf_api_token,
"Content-Type": "application/json"
}
# 构建 API URL
api_url = f"{TF_API_BASE_URL}/task/vwedtask/{task_record_id}/getAmrState"
try:
async with aiohttp.ClientSession() as session:
async with session.get(api_url, headers=headers) as response:
if response.status == 200:
result = await response.json()
logger.info(f"获取小车负载状态成功: task_record_id={task_record_id}, result={result}")
return result
else:
logger.error(f"获取小车负载状态失败: task_record_id={task_record_id}, status={response.status}")
response_text = await response.text()
logger.error(f"响应内容: {response_text}")
return {
"success": False,
"message": f"HTTP {response.status}: {response_text}",
"code": response.status
}
# 使用全局HTTP会话复用连接池
session = await get_http_session()
async with session.get(api_url, headers=headers) as response:
if response.status == 200:
result = await response.json()
logger.info(f"获取小车负载状态成功: task_record_id={task_record_id}, result={result}")
return result
else:
logger.error(f"获取小车负载状态失败: task_record_id={task_record_id}, status={response.status}")
response_text = await response.text()
logger.error(f"响应内容: {response_text}")
return {
"success": False,
"message": f"HTTP {response.status}: {response_text}",
"code": response.status
}
except Exception as e:
logger.error(f"获取小车负载状态异常: task_record_id={task_record_id}, error={str(e)}")
return {
@ -355,6 +495,7 @@ async def get_amr_loading_state(task_record_id: str, tf_api_token: str) -> Dict[
"code": 500
}
# # 任务类型到任务优先级
TASK_TYPE_TEMPLATE_MAPPING = {
TaskTypeEnum.GG2MP: "GG",
@ -393,6 +534,8 @@ TASK_TYPE_REMARK = {
TaskTypeEnum.MP2LG: "毛坯库:{0}-连杆:{1}",
TaskTypeEnum.MP2PHZ: "毛坯库:{0}-平衡轴:{1}",
}
@router.post("/newTask")
async def create_new_task(request: Request, task_request: ExternalTaskRequest = Body(...)):
"""
@ -539,7 +682,7 @@ async def create_new_task(request: Request, task_request: ExternalTaskRequest =
source_device=request.client.host if request.client else "unknown", # 使用客户端IP作为设备标识
use_modbus=False,
modbus_timeout=5000,
priority = priority
priority=priority
)
# 更新外部任务记录状态为运行中
@ -612,7 +755,7 @@ async def create_new_task(request: Request, task_request: ExternalTaskRequest =
message="成功",
rowCount=1
)
except Exception as e:
logger.error(f"创建外部任务异常: {str(e)}, ReqCode={task_request.ReqCode}")
# 如果已创建外部任务记录,更新状态为失败
@ -628,7 +771,7 @@ async def create_new_task(request: Request, task_request: ExternalTaskRequest =
)
except Exception as update_error:
logger.error(f"更新外部任务记录状态失败: {str(update_error)}")
return ExternalTaskResponse(
code=500,
reqCode=task_request.ReqCode,
@ -636,12 +779,13 @@ async def create_new_task(request: Request, task_request: ExternalTaskRequest =
rowCount=0
)
@router.post("/GenAgvSchedulingTask")
async def gen_agv_scheduling_task(request: Request, task_request: GenAgvSchedulingTaskRequest = Body(...)):
"""
AGV调度任务接口
用于生成AGV调度任务
逻辑
1. 根据 taskcode 参数查询 external_task_record 表获取对应的 task_record_id
2. 调用 get_task_record_detail 接口查询任务运行状态
@ -657,11 +801,10 @@ async def gen_agv_scheduling_task(request: Request, task_request: GenAgvScheduli
priority = TASK_TYPE_PRIORITY.get(TASK_TYPE_TEMPLATE_MAPPING.get(task_request.TaskTyp, "OR"))
remark = TASK_TYPE_REMARK.get(task_request.TaskTyp)
external_record = None
try:
logger.info(f"收到AGV调度任务请求:{task_request}")
# 检查系统是否允许处理任务
tf_api_token = await get_tf_api_token()
is_allowed = await check_task_permission(tf_api_token, TF_API_BASE_URL)
@ -673,12 +816,12 @@ async def gen_agv_scheduling_task(request: Request, task_request: GenAgvScheduli
message="由于系统限制创建任务失败,请联系管理员",
rowCount=0
)
# 导入数据库相关模块
from data.session import get_async_session
from data.models.operate_point_layer import OperatePointLayer
from sqlalchemy import select
# 验证任务条件已移至脚本处理器中,此处保留简单检查
if not task_request.TaskCode:
return ExternalTaskResponse(
@ -698,16 +841,15 @@ async def gen_agv_scheduling_task(request: Request, task_request: GenAgvScheduli
}
client_info_str = json.dumps(client_info, ensure_ascii=False)
# 根据任务类型获取对应的模板ID
template_id = DG_ID
# 创建外部任务记录
external_record = await ExternalTaskRecordService.create_agv_scheduling_task_record(
req_code=task_request.ReqCode,
task_code=task_request.TaskCode,
business_task_type=task_request.TaskTyp.value if hasattr(task_request.TaskTyp, 'value') else str(task_request.TaskTyp),
business_task_type=task_request.TaskTyp.value if hasattr(task_request.TaskTyp, 'value') else str(
task_request.TaskTyp),
security_key=task_request.SecurityKey or "",
type_field=task_request.Type,
sub_type=task_request.SubType,
@ -778,7 +920,7 @@ async def gen_agv_scheduling_task(request: Request, task_request: GenAgvScheduli
task_params.append(TaskInputParamNew(
name="priority",
type=InputParamType.STRING,
label="优先级",
label="优先级",
required=False,
defaultValue=priority,
remark="选车优先级"
@ -786,7 +928,7 @@ async def gen_agv_scheduling_task(request: Request, task_request: GenAgvScheduli
task_params.append(TaskInputParamNew(
name="TASK_CODE",
type=InputParamType.STRING,
label="任务id",
label="任务id",
required=False,
defaultValue=task_request.TaskCode,
remark="创建任务时任务id"
@ -794,7 +936,7 @@ async def gen_agv_scheduling_task(request: Request, task_request: GenAgvScheduli
task_params.append(TaskInputParamNew(
name="TASK_TYPE",
type=InputParamType.STRING,
label="任务类型",
label="任务类型",
required=False,
defaultValue=task_request.TaskTyp,
remark="创建任务时任务类型"
@ -816,7 +958,7 @@ async def gen_agv_scheduling_task(request: Request, task_request: GenAgvScheduli
source_device=request.client.host if request.client else "unknown", # 使用客户端IP作为设备标识
use_modbus=False,
modbus_timeout=5000,
priority = priority
priority=priority
)
# 更新外部任务记录状态为运行中
@ -885,22 +1027,25 @@ async def gen_agv_scheduling_task(request: Request, task_request: GenAgvScheduli
)
logger.info(f"AGV调度任务启动成功: ReqCode={task_request.ReqCode}, TaskRecordId={task_record_id}")
# 定义需要监控的任务类型
agv_callback_task_types = ["MP2GG", "MP2GT", "MP2ZG", "MP2QZ", "MP2LG", "MP2PHZ"]
# 启动异步任务监控,不阻塞当前接口 - 使用后台任务管理器
# 启动异步任务监控,不阻塞当前接口
if task_record_id and task_request.TaskTyp in agv_callback_task_types and TF_WEB_POST:
create_background_task(
monitor_agv_task_and_callback(
task_record_id=task_record_id,
req_code=task_request.TaskCode
),
name=f"monitor_agv_task_{task_record_id}",
context=f"TaskType={task_request.TaskTyp}, ReqCode={task_request.TaskCode}"
)
logger.info(f"已启动AGV调度任务监控: TaskType={task_request.TaskTyp}, TaskRecordId={task_record_id}")
# 创建后台监控任务并添加到管理集合
monitor_task = asyncio.create_task(monitor_agv_task_and_callback(
task_record_id=task_record_id,
req_code=task_request.TaskCode
))
# 将任务添加到集合中进行追踪
_background_monitor_tasks.add(monitor_task)
# 添加完成回调,自动清理
monitor_task.add_done_callback(_cleanup_background_task)
logger.info(
f"已启动AGV调度任务监控: TaskType={task_request.TaskTyp}, TaskRecordId={task_record_id}, 当前活跃监控任务数: {len(_background_monitor_tasks)}")
return ExternalTaskResponse(
code=0,
reqCode=task_request.TaskCode,
@ -923,7 +1068,7 @@ async def gen_agv_scheduling_task(request: Request, task_request: GenAgvScheduli
)
except Exception as update_error:
logger.error(f"更新外部任务记录状态失败: {str(update_error)}")
return ExternalTaskResponse(
code=500,
reqCode=task_request.ReqCode,
@ -931,21 +1076,22 @@ async def gen_agv_scheduling_task(request: Request, task_request: GenAgvScheduli
rowCount=0
)
@router.post("/cancelTask")
async def cancel_task(request: Request, cancel_request: CancelTaskRequest = Body(...)):
"""
取消任务接口
根据ReqCode查询对应的task_record_id然后调用内部接口终止任务并通知主系统
Args:
cancel_request: 取消任务请求包含ReqCode
Returns:
ExternalTaskResponse: 包含codereqCodemessagerowCount的响应
"""
try:
logger.info(f"收到取消任务请求: {cancel_request}")
# 检查系统是否允许处理任务
tf_api_token = await get_tf_api_token()
is_allowed = await check_task_permission(tf_api_token, TF_API_BASE_URL)
@ -957,9 +1103,9 @@ async def cancel_task(request: Request, cancel_request: CancelTaskRequest = Body
message="由于系统限制创建任务失败,请联系管理员",
rowCount=0
)
req_code = cancel_request.ReqCode
# 根据req_code查询external_task_record获取task_record_id
external_record = await ExternalTaskRecordService.get_external_task_record(req_code)
if not external_record:
@ -970,7 +1116,7 @@ async def cancel_task(request: Request, cancel_request: CancelTaskRequest = Body
message="未找到对应的任务记录",
rowCount=0
)
task_record_id = external_record.task_record_id
if not task_record_id:
logger.error(f"外部任务记录中没有关联的task_record_id: ReqCode={req_code}")
@ -980,7 +1126,7 @@ async def cancel_task(request: Request, cancel_request: CancelTaskRequest = Body
message="任务记录中没有关联的内部任务ID",
rowCount=0
)
# 通过task_record_id查询任务详情检查任务状态
task_detail_result = await TaskRecordService.get_task_record_detail(task_record_id)
if not task_detail_result.get("success", False):
@ -991,10 +1137,10 @@ async def cancel_task(request: Request, cancel_request: CancelTaskRequest = Body
message="获取任务详情失败",
rowCount=0
)
task_detail = task_detail_result.get("data", {})
task_status = task_detail.get("status", "")
# 检查任务状态,只有运行状态的任务才允许取消
if task_status in [TaskStatus.COMPLETED, TaskStatus.FAILED, TaskStatus.CANCELED]:
logger.warning(f"任务已处于终止状态,无法取消: ReqCode={req_code}, TaskStatus={task_status}")
@ -1004,18 +1150,19 @@ async def cancel_task(request: Request, cancel_request: CancelTaskRequest = Body
message=f"任务已处于{task_status}状态,无法取消",
rowCount=0
)
# 检查小车负载状态
logger.info(f"检查小车负载状态: task_record_id={task_record_id}")
amr_state_result = await get_amr_loading_state(task_record_id, tf_api_token)
if amr_state_result.get("success", False):
amr_state_data = amr_state_result.get("result", {})
amr_loading = amr_state_data.get("amr_loading", False)
amr_name = amr_state_data.get("amr_name", "")
logger.info(f"小车负载状态: task_record_id={task_record_id}, amr_loading={amr_loading}, amr_name={amr_name}")
logger.info(
f"小车负载状态: task_record_id={task_record_id}, amr_loading={amr_loading}, amr_name={amr_name}")
# 如果小车处于负载状态,不允许取消任务
if amr_loading:
logger.warning(f"小车处于负载状态,不允许终止任务: ReqCode={req_code}, AMR={amr_name}")
@ -1028,15 +1175,15 @@ async def cancel_task(request: Request, cancel_request: CancelTaskRequest = Body
else:
# 如果获取小车状态失败,记录警告但继续执行取消操作
logger.warning(f"获取小车负载状态失败,继续执行取消操作: {amr_state_result.get('message', '')}")
# 调用内部接口停止任务
logger.info(f"调用内部接口停止任务: task_record_id={task_record_id}")
stop_result = await TaskRecordService.stop_task_record(task_record_id)
if not stop_result.get("success", False):
error_msg = stop_result.get("message", "停止任务失败")
logger.error(f"停止任务失败: {error_msg}, task_record_id={task_record_id}")
# 检查是否是"已载货,请人工干预"的情况
if "已载货" in error_msg or "人工干预" in error_msg:
return ExternalTaskResponse(
@ -1045,23 +1192,23 @@ async def cancel_task(request: Request, cancel_request: CancelTaskRequest = Body
message="已载货,请人工干预",
rowCount=0
)
return ExternalTaskResponse(
code=1,
reqCode=req_code,
message=error_msg,
rowCount=0
)
# 通知主系统任务已终止
logger.info(f"通知主系统任务已终止: task_record_id={task_record_id}")
try:
await set_task_terminated(task_record_id, tf_api_token)
logger.info(f"成功通知主系统任务已终止: task_record_id={task_record_id}")
except Exception as sync_error:
logger.warning(f"通知主系统失败,但任务已成功取消: {str(sync_error)}, task_record_id={task_record_id}")
# 更新外部任务记录状态为已取消
await ExternalTaskRecordService.update_task_record_status(
req_code=external_record.id,
@ -1070,7 +1217,7 @@ async def cancel_task(request: Request, cancel_request: CancelTaskRequest = Body
response_message="任务已取消",
response_row_count=0
)
logger.info(f"任务取消成功: ReqCode={req_code}, TaskRecordId={task_record_id}")
return ExternalTaskResponse(
code=0,
@ -1078,7 +1225,7 @@ async def cancel_task(request: Request, cancel_request: CancelTaskRequest = Body
message="成功",
rowCount=0
)
except Exception as e:
logger.error(f"取消任务异常: {str(e)}, ReqCode={cancel_request.ReqCode}")
return ExternalTaskResponse(
@ -1091,27 +1238,27 @@ async def cancel_task(request: Request, cancel_request: CancelTaskRequest = Body
@router.get("/api/external-task-record/by-req-code/{req_code}")
async def get_external_task_record_by_req_code(
req_code: str = Path(..., description="请求标识码")
req_code: str = Path(..., description="请求标识码")
):
"""
根据ReqCode查询外部任务记录
Args:
req_code: 请求标识码
Returns:
包含外部任务记录信息的响应
"""
try:
# 查询外部任务记录
external_record = await ExternalTaskRecordService.get_external_task_record(req_code)
if not external_record:
return error_response(
message=f"未找到ReqCode为 {req_code} 的外部任务记录",
code=404
)
# 构建响应数据
response_data = {
"id": external_record.id,
@ -1133,15 +1280,42 @@ async def get_external_task_record_by_req_code(
"created_at": external_record.created_at.isoformat() if external_record.created_at else None,
"updated_at": external_record.updated_at.isoformat() if external_record.updated_at else None
}
return format_response(
data=response_data,
message="成功获取外部任务记录"
)
except Exception as e:
logger.error(f"查询外部任务记录异常: {str(e)}, req_code={req_code}")
return error_response(
message=f"查询外部任务记录失败: {str(e)}",
code=500
)
@router.get("/api/monitor-tasks/status")
async def get_monitor_tasks_status():
"""
获取后台监控任务状态用于调试和监控
Returns:
包含活跃监控任务数量的响应
"""
try:
active_count = get_active_monitor_tasks_count()
return format_response(
data={
"activeMonitorTasks": active_count,
"message": f"当前有 {active_count} 个活跃的后台监控任务"
},
message="成功获取监控任务状态"
)
except Exception as e:
logger.error(f"获取监控任务状态异常: {str(e)}")
return error_response(
message=f"获取监控任务状态失败: {str(e)}",
code=500
)

View File

@ -975,26 +975,46 @@ class EnhancedTaskScheduler:
source_client_info=source_client_info
)
def _cleanup_executor(self, executor: Optional['TaskExecutor'], context: str = "") -> None:
"""
清理executor对象防止内存泄漏
Args:
executor: 任务执行器对象
context: 上下文信息用于日志记录
"""
if executor:
try:
if hasattr(executor, 'cleanup'):
executor.cleanup()
logger.debug(f"{context} executor已清理")
except Exception as e:
logger.error(f"{context} 清理executor异常: {str(e)}")
async def _worker(self, worker_id: int) -> None:
"""
工作线程
从队列中获取任务并执行
Args:
worker_id: 工作线程ID
"""
logger.info(f"工作线程 {worker_id} 启动")
while self.is_running:
executor = None # 在外层定义,确保异常处理时可访问
task_record_id = None
queue_index = -1
cancel_checker_task = None
try:
# 从队列获取任务
queue_index, item = await self.queue_manager.dequeue(worker_id, self.worker_manager.get_worker_count())
# 如果没有任务,继续等待
if queue_index == -1 or item is None:
await asyncio.sleep(0.1) # 短暂休眠
continue
# 解析优先级和任务ID
if isinstance(item, tuple) and len(item) == 2:
priority, task_record_id = item
@ -1004,43 +1024,35 @@ class EnhancedTaskScheduler:
# 兼容旧格式
task_record_id = item
priority = PeriodicTaskStatus.PERIODIC
# 更新工作线程状态
self.worker_manager.update_worker_status(worker_id, {
"current_task": task_record_id,
"task_priority": priority,
"task_start_time": datetime.now()
})
logger.info(f"工作线程 {worker_id} 获取到任务: {task_record_id}, 优先级: {priority}")
# 执行任务
# 创建任务执行器
executor = TaskExecutor(task_record_id)
# 设置超时时间
executor.set_timeout(self.task_timeout)
# 记录到正在执行的任务
self.running_tasks[task_record_id] = executor
# 从持久化管理器中移除(正在执行的任务不需要持久化)
await self.persistence_manager.remove_task(task_record_id)
# 创建一个取消任务检查器,定期检查数据库中任务是否被标记为取消
cancel_checker_task = asyncio.create_task(self._check_task_cancel(task_record_id, executor))
try:
# 创建任务执行器
executor = TaskExecutor(task_record_id)
# 设置超时时间
executor.set_timeout(self.task_timeout)
# 记录到正在执行的任务
self.running_tasks[task_record_id] = executor
# 从持久化管理器中移除(正在执行的任务不需要持久化)
await self.persistence_manager.remove_task(task_record_id)
# 创建一个取消任务检查器,定期检查数据库中任务是否被标记为取消
cancel_checker_task = asyncio.create_task(self._check_task_cancel(task_record_id, executor))
# 执行任务
result = await executor.execute()
# 取消检查器任务
cancel_checker_task.cancel()
try:
await cancel_checker_task
except asyncio.CancelledError:
pass
# 更新工作线程状态
self.worker_manager.update_worker_status(worker_id, {
"current_task": None,
@ -1049,62 +1061,12 @@ class EnhancedTaskScheduler:
"task_count": self.worker_manager.worker_status[worker_id].get("task_count", 0) + 1
})
# 显式清理executor对象释放内存
try:
if executor:
# 清理TaskContext
if hasattr(executor, 'task_context') and executor.task_context:
if hasattr(executor.task_context, 'cleanup'):
executor.task_context.cleanup()
executor.task_context = None
# 清理BlockExecutor
if hasattr(executor, 'block_executor') and executor.block_executor:
executor.block_executor.task_context = None
executor.block_executor = None
# 清空其他引用
executor.task_record = None
executor.task_def = None
logger.debug(f"任务 {task_record_id} 的executor已清理")
except Exception as cleanup_error:
logger.error(f"清理executor失败: {str(cleanup_error)}")
# 移除正在执行的任务
self.running_tasks.pop(task_record_id, None)
logger.info(f"工作线程 {worker_id} 完成任务: {task_record_id}, 结果: {result.get('success')}")
except Exception as e:
logger.error(f"工作线程 {worker_id} 执行任务异常: {str(e)}")
logger.error(traceback.format_exc())
# 显式清理executor对象释放内存
try:
if executor:
# 清理TaskContext
if hasattr(executor, 'task_context') and executor.task_context:
if hasattr(executor.task_context, 'cleanup'):
executor.task_context.cleanup()
executor.task_context = None
# 清理BlockExecutor
if hasattr(executor, 'block_executor') and executor.block_executor:
executor.block_executor.task_context = None
executor.block_executor = None
# 清空其他引用
executor.task_record = None
executor.task_def = None
logger.debug(f"任务 {task_record_id} 的executor已清理(异常分支)")
except Exception as cleanup_error:
logger.error(f"清理executor失败(异常分支): {str(cleanup_error)}")
# 移除正在执行的任务
self.running_tasks.pop(task_record_id, None)
# 检查是否需要重试
await self._handle_task_error(task_record_id, str(e))
@ -1116,30 +1078,54 @@ class EnhancedTaskScheduler:
"last_error": str(e),
"task_count": self.worker_manager.worker_status[worker_id].get("task_count", 0) + 1
})
finally:
# 统一清理逻辑,无论成功或失败都会执行
# 取消检查器任务
if cancel_checker_task:
cancel_checker_task.cancel()
try:
await cancel_checker_task
except asyncio.CancelledError:
pass
# 移除正在执行的任务
if task_record_id:
self.running_tasks.pop(task_record_id, None)
# 清理executor对象防止内存泄漏
self._cleanup_executor(executor, f"工作线程 {worker_id}")
executor = None
# 标记任务完成
self.queue_manager.task_done(queue_index)
if queue_index != -1:
self.queue_manager.task_done(queue_index)
# 更新工作线程心跳
self.worker_manager.update_worker_heartbeat(worker_id)
except asyncio.TimeoutError:
# 超时,继续下一次循环
continue
except asyncio.CancelledError:
# 取消异常,退出循环
logger.info(f"工作线程 {worker_id} 被取消")
# 清理可能存在的executor
self._cleanup_executor(executor, f"工作线程 {worker_id} 取消时")
break
except Exception as e:
logger.error(f"工作线程 {worker_id} 异常: {str(e)}")
logger.error(traceback.format_exc())
# 清理可能存在的executor
self._cleanup_executor(executor, f"工作线程 {worker_id} 异常时")
# 更新工作线程状态
self.worker_manager.update_worker_status(worker_id, {
"error": str(e),
"error_time": datetime.now()
})
# 短暂休眠,避免频繁错误
await asyncio.sleep(1.0)

View File

@ -36,13 +36,14 @@ class BlockExecutor:
def __init__(self, task_context: TaskContext):
"""
初始化块执行器
Args:
task_context: 任务上下文
"""
self.task_context = task_context
self.is_canceled = False
self.is_error = False
self._is_cleaned = False # 防止重复清理标志
def cancel(self) -> None:
"""
@ -168,12 +169,24 @@ class BlockExecutor:
return result
else:
# 更新块状态为失败
error_msg = result.get("message", "执行失败")
await self._update_block_record(block_record_id, TaskBlockRecordStatus.FAILED, error_msg)
# 检查是否是"自身成功但子块失败"的特殊情况
if result.get("self_success", False):
# 本块执行成功,更新状态为成功
success_msg = result.get("message", "执行成功")
await self._update_block_record(
block_record_id,
TaskBlockRecordStatus.SUCCESS,
success_msg,
result.get("output", {})
)
logger.info(f"{block_name} 自身执行成功,但子块失败")
else:
# 更新块状态为失败
error_msg = result.get("message", "执行失败")
await self._update_block_record(block_record_id, TaskBlockRecordStatus.FAILED, error_msg)
# 设置错误信息
self.task_context.set_error(error_msg, block_id)
# 设置错误信息(无论哪种情况,任务都要标记为失败)
self.task_context.set_error(result.get("message", "执行失败"), block_id)
return result
except Exception as e:
@ -448,13 +461,27 @@ class BlockExecutor:
}
else:
logger.error(f"子块 {child_id} 执行失败: {result.get('message', '未知错误')}")
# 更新块记录状态为失败
await self._update_block_record(
block_record_id,
TaskBlockRecordStatus.FAILED, # 失败
result.get("message", "执行失败"),
)
# 检查是否是"自身成功但子块失败"的特殊情况
if result.get("self_success", False):
# 本块执行成功,更新状态为成功
success_msg = result.get("message", "执行成功")
await self._update_block_record(
block_record_id,
TaskBlockRecordStatus.SUCCESS,
success_msg,
result.get("output", {})
)
logger.info(f"子块 {child_name} 自身执行成功,但其子块失败")
else:
# 更新块记录状态为失败
await self._update_block_record(
block_record_id,
TaskBlockRecordStatus.FAILED, # 失败
result.get("message", "执行失败"),
)
# 无论哪种情况,都需要终止后续兄弟块的执行
# 为剩余未执行的兄弟块创建终止记录
await self._create_terminated_records_for_remaining_siblings(
children, i + 1, f"因前序块 {child_name} 执行失败而终止"
@ -1833,4 +1860,31 @@ class BlockExecutor:
params = params_str.split(",")
return params
return params
def cleanup(self) -> None:
"""
清理 BlockExecutor 占用的资源
优化释放对 task_context 的引用避免循环引用导致的内存泄漏
"""
# 防止重复清理
if self._is_cleaned:
return
try:
# 清空 task_context 引用 (注意: task_context 由 TaskExecutor 负责清理)
# 这里只需要解除引用,不调用 task_context.cleanup()
if self.task_context:
self.task_context = None
# 重置状态标志
self.is_canceled = False
self.is_error = False
# 标记已清理
self._is_cleaned = True
logger.debug("BlockExecutor 已清理")
except Exception as e:
logger.error(f"清理 BlockExecutor 失败: {str(e)}")

View File

@ -341,14 +341,15 @@ class RobotBlockHandler(BlockHandler):
logger.info(f"成功更新任务记录 {task_record_id} 的agv_id字段: {final_agv_id}")
except Exception as e:
logger.error(f"更新任务记录 {task_record_id} 的agv_id字段时发生错误: {str(e)}")
async def update_block_record(self, block_record_id: str, agv_id: str = None) -> None:
async def update_block_record(self, block_record_id: str, agv_id: str = None, block_name: str = None, output_data: Dict[str, Any] = None) -> None:
"""
更新块记录的通用方法
更新块记录的通用方法立即更新输出参数
Args:
block_record_id: 块记录ID
status: 状态码
message: 消息
agv_id: 选择的机器人名称
block_name: 块名称
output_data: 输出数据
"""
try:
from sqlalchemy.ext.asyncio import AsyncSession
@ -356,18 +357,54 @@ class RobotBlockHandler(BlockHandler):
from data.models.blockrecord import VWEDBlockRecord
from sqlalchemy import select, update
from data.enum.task_block_record_enum import TaskBlockRecordStatus
from data.enum.task_input_param_enum import TaskInputParamVariables
from datetime import datetime
if not block_record_id:
logger.warning(f"未提供块记录ID无法更新块记录")
return
stmt = update(VWEDBlockRecord).where(VWEDBlockRecord.id == block_record_id).values(
status=TaskBlockRecordStatus.SUCCESS,
ended_reason="执行成功",
remark="执行成功"
)
async with get_async_session() as session:
session: AsyncSession = session
# 获取块记录以获取块名称
stmt_select = select(VWEDBlockRecord).where(VWEDBlockRecord.id == block_record_id)
result = await session.execute(stmt_select)
block_record = result.scalar_one_or_none()
if not block_record:
logger.warning(f"未找到块记录: {block_record_id}")
return
# 使用传入的块名称或从记录中获取
actual_block_name = block_name or block_record.block_name
# 构建输出参数
update_values = {
"status": TaskBlockRecordStatus.SUCCESS,
"ended_reason": "选择机器人成功",
"remark": "选择机器人成功"
}
# 如果提供了输出数据,立即更新输出参数
if output_data:
# 构建完整输出结构 {"blocks": {"块名称": 输出内容}}
full_output = {TaskInputParamVariables.BLOCKS: {actual_block_name: output_data}}
output_full_json = json.dumps(full_output, ensure_ascii=False)
output_value_json = json.dumps(output_data, ensure_ascii=False)
update_values["output_params"] = output_full_json
update_values["block_out_params_value"] = output_value_json
logger.info(f"立即更新块 {actual_block_name} 的输出参数: {output_data}")
# 更新块记录
stmt = update(VWEDBlockRecord).where(VWEDBlockRecord.id == block_record_id).values(**update_values)
await session.execute(stmt)
await session.commit()
logger.info(f"成功更新块记录 {block_record_id} 的状态和输出参数")
except Exception as e:
logger.error(f"更新块记录 {block_record_id} 时发生错误: {str(e)}")
@ -840,20 +877,29 @@ class SelectAgvBlockHandler(RobotBlockHandler):
}
await self._record_task_log(block, result, context)
return result
# 获取当前块ID和名称提前获取以便更新块记录
current_block_id = block.get("id", "unknown")
current_block_name = block.get("name", f"b{current_block_id}")
# 构建输出数据
output_data = {
"selectedAgvId": amr_name,
}
results = {
"success": True,
"message": f"选择机器人块成功, 块id{current_block_name}",
"output": {
"selectedAgvId": amr_name,
}
"output": output_data
}
await self._record_task_log(block, results, context)
# 更新块记录状态为成功
await self.update_block_record(context.block_record_id, amr_name)
# 获取当前块ID和名称
current_block_id = block.get("id", "unknown")
current_block_name = block.get("name", f"b{current_block_id}")
# 立即更新块记录状态为成功,并更新输出参数
await self.update_block_record(
block_record_id=context.block_record_id,
agv_id=amr_name,
block_name=current_block_name,
output_data=output_data
)
# 更新任务记录中的agv_id字段
await self._update_task_record_agv_id(context.task_record_id, amr_name)
@ -926,7 +972,7 @@ class SelectAgvBlockHandler(RobotBlockHandler):
if "子块" not in result["message"]:
result["message"] = f"{result['message']},子块执行成功, 块id{current_block_name}"
else:
# 子块执行失败,根据失败的子块更新消息
# 子块执行失败,但选择机器人本身是成功的
logger.error(f"选择机器人块 {current_block_name} 的子块执行失败: {loop_result.get('message')}")
# 创建包含子块失败信息的结果
@ -935,6 +981,8 @@ class SelectAgvBlockHandler(RobotBlockHandler):
result = {
"success": False,
"self_success": True, # 标记:自身执行成功,但子块失败
"children_failed": True, # 标记:子块执行失败
"message": f"选择执行机器人成功 选择小车:{amr_name},但子块执行失败: {error_msg}失败块ID: {failed_block_id}",
"output": {
"selectedAgvId": amr_name,

View File

@ -20,6 +20,7 @@ from config.settings import settings
logger = get_logger("services.execution.handlers.storage_queue_manager")
class RequestPriority(Enum):
"""请求优先级"""
LOW = 1
@ -27,6 +28,7 @@ class RequestPriority(Enum):
HIGH = 3
URGENT = 4
class RequestStatus(Enum):
"""请求状态"""
PENDING = "pending"
@ -36,6 +38,7 @@ class RequestStatus(Enum):
TIMEOUT = "timeout"
CANCELLED = "cancelled"
@dataclass
class StorageRequest:
"""库位请求"""
@ -47,46 +50,49 @@ class StorageRequest:
task_record_id: str
priority: RequestPriority = RequestPriority.NORMAL
created_at: float = field(default_factory=time.time)
timeout: float = field(default_factory=lambda: settings.STORAGE_QUEUE_DEFAULT_TIMEOUT if settings.STORAGE_QUEUE_ENABLE_TIMEOUT else float('inf'))
timeout: float = field(
default_factory=lambda: settings.STORAGE_QUEUE_DEFAULT_TIMEOUT if settings.STORAGE_QUEUE_ENABLE_TIMEOUT else float(
'inf'))
retry_count: int = 0
max_retries: int = 3
status: RequestStatus = RequestStatus.PENDING
result: Optional[Dict[str, Any]] = None
error_message: Optional[str] = None
def __lt__(self, other):
"""用于优先级队列排序"""
if self.priority.value != other.priority.value:
return self.priority.value > other.priority.value # 高优先级优先
return self.created_at < other.created_at # 时间早的优先
class StorageQueueManager:
"""库位请求队列管理器"""
def __init__(self, max_workers: int = settings.STORAGE_QUEUE_MAX_WORKERS, max_queue_size: int = None):
self.max_workers = max_workers or settings.STORAGE_QUEUE_MAX_WORKERS
self.max_queue_size = max_queue_size or settings.STORAGE_QUEUE_MAX_SIZE
self.enable_timeout = settings.STORAGE_QUEUE_ENABLE_TIMEOUT
self.cleanup_interval = settings.STORAGE_QUEUE_CLEANUP_INTERVAL
self.completed_request_ttl = settings.STORAGE_QUEUE_COMPLETED_REQUEST_TTL
# 优先级队列
self.request_queue: List[StorageRequest] = []
self.queue_lock = asyncio.Lock()
# 请求跟踪
self.pending_requests: Dict[str, StorageRequest] = {}
self.processing_requests: Dict[str, StorageRequest] = {}
self.completed_requests: Dict[str, StorageRequest] = {}
# 工作者管理
self.workers: List[asyncio.Task] = []
self.worker_semaphore = asyncio.Semaphore(max_workers)
self.shutdown_event = asyncio.Event()
# 处理器注册
self.handlers: Dict[str, Callable] = {}
# 统计信息
self.stats = {
'requests_total': 0,
@ -97,35 +103,35 @@ class StorageQueueManager:
'queue_size': 0,
'active_workers': 0
}
# 启动清理任务
self.cleanup_task = None
async def start(self):
"""启动队列管理器"""
logger.info("启动库位请求队列管理器")
# 启动工作者
for i in range(self.max_workers):
worker = asyncio.create_task(self._worker(f"worker-{i}"))
self.workers.append(worker)
# 启动清理任务
self.cleanup_task = asyncio.create_task(self._cleanup_completed_requests())
logger.info(f"队列管理器已启动,工作者数量: {self.max_workers}")
async def stop(self):
"""停止队列管理器"""
logger.info("停止库位请求队列管理器")
# 设置停止信号
self.shutdown_event.set()
# 等待所有工作者完成
if self.workers:
await asyncio.gather(*self.workers, return_exceptions=True)
# 停止清理任务
if self.cleanup_task:
self.cleanup_task.cancel()
@ -133,31 +139,31 @@ class StorageQueueManager:
await self.cleanup_task
except asyncio.CancelledError:
pass
logger.info("队列管理器已停止")
def register_handler(self, handler_type: str, handler_func: Callable):
"""注册处理器"""
self.handlers[handler_type] = handler_func
logger.info(f"注册处理器: {handler_type}")
async def submit_request(self, request: StorageRequest) -> str:
"""提交请求到队列"""
if len(self.request_queue) >= self.max_queue_size:
raise Exception("队列已满,无法提交新请求")
async with self.queue_lock:
# 添加到优先级队列
heapq.heappush(self.request_queue, request)
self.pending_requests[request.request_id] = request
# 更新统计
self.stats['requests_total'] += 1
self.stats['queue_size'] = len(self.request_queue)
logger.debug(f"提交请求到队列: {request.request_id}, 优先级: {request.priority.name}")
return request.request_id
async def get_request_status(self, request_id: str) -> Optional[Dict[str, Any]]:
"""获取请求状态"""
# 检查各个状态的请求
@ -187,13 +193,13 @@ class StorageQueueManager:
"error_message": request.error_message,
"processing_time": time.time() - request.created_at
}
return None
async def wait_for_result(self, request_id: str, timeout: float = None) -> Dict[str, Any]:
"""等待请求结果"""
start_time = time.time()
# 根据配置决定是否使用超时
if not self.enable_timeout:
# 禁用超时,无限等待
@ -203,64 +209,79 @@ class StorageQueueManager:
# 启用超时,使用提供的超时时间或默认值
check_timeout = timeout or settings.STORAGE_QUEUE_DEFAULT_TIMEOUT
logger.debug(f"等待请求结果(超时 {check_timeout}s: {request_id}")
while True:
# 检查是否完成
if request_id in self.completed_requests:
request = self.completed_requests[request_id]
if request.status == RequestStatus.COMPLETED:
return request.result
else:
raise Exception(f"请求失败: {request.error_message}")
# 只有在启用超时时才检查超时
if self.enable_timeout:
# 检查等待超时
if time.time() - start_time >= check_timeout:
await self._mark_request_timeout(request_id)
raise Exception("等待结果超时")
# 检查请求本身是否超时
if request_id in self.pending_requests:
request = self.pending_requests[request_id]
if request.timeout != float('inf') and time.time() - request.created_at > request.timeout:
try:
while True:
# 检查是否完成
if request_id in self.completed_requests:
request = self.completed_requests[request_id]
if request.status == RequestStatus.COMPLETED:
result = request.result
# 立即清理已完成的请求,避免内存积累
del self.completed_requests[request_id]
logger.debug(f"请求结果已取走并清理: {request_id}")
return result
else:
error_msg = request.error_message
# 失败的请求也立即清理
del self.completed_requests[request_id]
logger.debug(f"请求失败结果已取走并清理: {request_id}")
raise Exception(f"请求失败: {error_msg}")
# 只有在启用超时时才检查超时
if self.enable_timeout:
# 检查等待超时
if time.time() - start_time >= check_timeout:
await self._mark_request_timeout(request_id)
raise Exception("请求超时")
await asyncio.sleep(0.1) # 避免忙等待
raise Exception("等待结果超时")
# 检查请求本身是否超时
if request_id in self.pending_requests:
request = self.pending_requests[request_id]
if request.timeout != float('inf') and time.time() - request.created_at > request.timeout:
await self._mark_request_timeout(request_id)
raise Exception("请求超时")
await asyncio.sleep(0.1) # 避免忙等待
except Exception:
# 发生异常时,也要尝试清理可能存在的已完成请求
if request_id in self.completed_requests:
del self.completed_requests[request_id]
logger.debug(f"异常时清理请求: {request_id}")
raise
async def cancel_request(self, request_id: str) -> bool:
"""取消请求"""
async with self.queue_lock:
if request_id in self.pending_requests:
request = self.pending_requests[request_id]
request.status = RequestStatus.CANCELLED
# 从队列中移除
self.request_queue = [r for r in self.request_queue if r.request_id != request_id]
heapq.heapify(self.request_queue)
# 移动到完成队列
del self.pending_requests[request_id]
self.completed_requests[request_id] = request
self.stats['queue_size'] = len(self.request_queue)
logger.info(f"取消请求: {request_id}")
return True
return False
def get_queue_stats(self) -> Dict[str, Any]:
"""获取队列统计信息"""
self.stats['queue_size'] = len(self.request_queue)
self.stats['active_workers'] = sum(1 for w in self.workers if not w.done())
return self.stats.copy()
async def _worker(self, worker_name: str):
"""工作者协程"""
logger.info(f"工作者 {worker_name} 启动")
while not self.shutdown_event.is_set():
try:
# 获取请求
@ -268,104 +289,108 @@ class StorageQueueManager:
if not request:
await asyncio.sleep(0.1)
continue
# 处理请求
async with self.worker_semaphore:
await self._process_request(request, worker_name)
except Exception as e:
logger.error(f"工作者 {worker_name} 处理请求异常: {str(e)}")
await asyncio.sleep(1)
logger.info(f"工作者 {worker_name} 停止")
async def _get_next_request(self) -> Optional[StorageRequest]:
"""获取下一个请求"""
async with self.queue_lock:
while self.request_queue:
request = heapq.heappop(self.request_queue)
# 检查请求是否仍然有效
if request.request_id in self.pending_requests:
# 只有在启用超时且请求设置了有限超时时才检查超时
if (self.enable_timeout and
request.timeout != float('inf') and
time.time() - request.created_at > request.timeout):
if (self.enable_timeout and
request.timeout != float('inf') and
time.time() - request.created_at > request.timeout):
await self._mark_request_timeout(request.request_id)
continue
# 移动到处理队列
del self.pending_requests[request.request_id]
self.processing_requests[request.request_id] = request
request.status = RequestStatus.PROCESSING
self.stats['queue_size'] = len(self.request_queue)
return request
return None
async def _process_request(self, request: StorageRequest, worker_name: str):
"""处理单个请求"""
start_time = time.time()
logger.debug(f"工作者 {worker_name} 开始处理请求: {request.request_id}")
try:
# 获取处理器
handler = self.handlers.get(request.handler_type)
if not handler:
raise Exception(f"未找到处理器: {request.handler_type}")
# 执行处理
result = await handler(request.input_params, request.context_data, request.map_id, request.task_record_id)
# 标记完成
await self._mark_request_completed(request.request_id, result)
processing_time = time.time() - start_time
logger.debug(f"工作者 {worker_name} 完成请求: {request.request_id}, 用时: {processing_time:.3f}s")
except Exception as e:
# 处理失败
error_msg = str(e)
logger.error(f"工作者 {worker_name} 处理请求失败: {request.request_id}, 错误: {error_msg}")
# 检查是否需要重试
if request.retry_count < request.max_retries:
await self._retry_request(request)
else:
await self._mark_request_failed(request.request_id, error_msg)
finally:
# 更新平均处理时间
# 更新平均处理时间(只在这里统计,移除 _mark_request_completed 中的重复计数)
processing_time = time.time() - start_time
total_time = self.stats['avg_processing_time'] * self.stats['requests_completed']
self.stats['requests_completed'] += 1
self.stats['avg_processing_time'] = (total_time + processing_time) / self.stats['requests_completed']
if self.stats['requests_completed'] > 0:
total_time = self.stats['avg_processing_time'] * self.stats['requests_completed']
self.stats['avg_processing_time'] = (total_time + processing_time) / (
self.stats['requests_completed'] + 1)
else:
self.stats['avg_processing_time'] = processing_time
async def _mark_request_completed(self, request_id: str, result: Dict[str, Any]):
"""标记请求完成"""
if request_id in self.processing_requests:
request = self.processing_requests[request_id]
request.status = RequestStatus.COMPLETED
request.result = result
del self.processing_requests[request_id]
self.completed_requests[request_id] = request
# 更新完成计数(移除了重复计数)
self.stats['requests_completed'] += 1
async def _mark_request_failed(self, request_id: str, error_message: str):
"""标记请求失败"""
if request_id in self.processing_requests:
request = self.processing_requests[request_id]
request.status = RequestStatus.FAILED
request.error_message = error_message
del self.processing_requests[request_id]
self.completed_requests[request_id] = request
self.stats['requests_failed'] += 1
async def _mark_request_timeout(self, request_id: str):
"""标记请求超时"""
request = None
@ -375,58 +400,63 @@ class StorageQueueManager:
elif request_id in self.processing_requests:
request = self.processing_requests[request_id]
del self.processing_requests[request_id]
if request:
request.status = RequestStatus.TIMEOUT
request.error_message = "请求超时"
self.completed_requests[request_id] = request
self.stats['requests_timeout'] += 1
async def _retry_request(self, request: StorageRequest):
"""重试请求"""
request.retry_count += 1
request.status = RequestStatus.PENDING
# 重新加入队列
async with self.queue_lock:
heapq.heappush(self.request_queue, request)
del self.processing_requests[request.request_id]
self.pending_requests[request.request_id] = request
self.stats['queue_size'] = len(self.request_queue)
logger.info(f"重试请求: {request.request_id}, 第 {request.retry_count} 次重试")
def _get_queue_position(self, request_id: str) -> int:
"""获取请求在队列中的位置"""
for i, request in enumerate(self.request_queue):
if request.request_id == request_id:
return i + 1
return -1
async def _cleanup_completed_requests(self):
"""清理已完成的请求"""
"""
清理已完成的请求兜底机制
注意采用立即清理模式后此方法主要作为兜底保护防止异常情况下的内存泄漏
"""
while not self.shutdown_event.is_set():
try:
await asyncio.sleep(self.cleanup_interval) # 使用配置的清理间隔
current_time = time.time()
cleanup_threshold = self.completed_request_ttl # 使用配置的保留时间
to_remove = []
for request_id, request in self.completed_requests.items():
if current_time - request.created_at > cleanup_threshold:
to_remove.append(request_id)
for request_id in to_remove:
del self.completed_requests[request_id]
if to_remove:
logger.info(f"清理了 {len(to_remove)} 个已完成的请求")
logger.info(
f"兜底清理了 {len(to_remove)} 个未被取走的已完成请求(可能存在未调用 wait_for_result 的情况)")
except Exception as e:
logger.error(f"清理已完成请求异常: {str(e)}")
# 全局队列管理器实例
storage_queue_manager = StorageQueueManager()

View File

@ -61,7 +61,8 @@ class TaskContext:
self.map_id = map_id # 地图ID
self.parent_log_id = None # 当前父日志ID(用于建立层级关系)
self.current_iteration_index = None # 当前迭代索引
self._is_cleaned = False # 标记是否已清理,防止重复清理
def set_current_block(self, block_id: str, block_name: str):
"""
设置当前正在执行的块
@ -396,28 +397,36 @@ class TaskContext:
def cleanup(self) -> None:
"""
清理上下文数据释放内存
用于任务执行完成后释放大型数据结构防止内存泄漏
修复内存泄漏: 任务完成后及时清理大型数据结构
优化彻底置空所有引用而不是仅清空容器
"""
# # 防止重复清理
if self._is_cleaned:
return
try:
# 清理大型字典
if self.variables:
self.variables.clear()
if self.variable_sources:
self.variable_sources.clear()
if self.block_outputs:
self.block_outputs.clear()
if self.outputs:
self.outputs.clear()
# 清理列表
if self.execution_path:
self.execution_path.clear()
# 清空引用
self.input_params = {}
# 彻底清空所有字典和列表(直接置 None而不是 clear()
self.variables = None
self.variable_sources = None
self.block_outputs = None
self.outputs = None
self.execution_path = None
self.input_params = None
self.error = None
logger.debug(f"任务上下文 {self.task_record_id} 已清理")
# 清理其他可能的大对象引用
self.token = None
self.map_id = None
self.block_record_id = None
self.skip_to_component_id = None
self.failure_reason = None
# 清理 ID 和名称引用
self.current_block_id = None
self.current_block_name = None
self._is_cleaned = True
logger.debug(f"任务上下文 {self.task_record_id} 已彻底清理")
except Exception as e:
logger.error(f"清理任务上下文失败: {str(e)}")

View File

@ -38,7 +38,7 @@ class TaskExecutor:
def __init__(self, task_record_id: str):
"""
初始化任务执行器
Args:
task_record_id: 任务记录ID
"""
@ -52,6 +52,7 @@ class TaskExecutor:
self.timeout = 3600*10 # 默认超时时间10小时
self.is_canceled = False
self.is_error = False
self._is_cleaned = False # 防止重复清理标志
def set_timeout(self, timeout_seconds: int) -> None:
"""
@ -215,7 +216,7 @@ class TaskExecutor:
task_detail = json.loads(task_detail_str)
root_block = task_detail.get("rootBlock", {})
release_sites = self.task_def.release_sites
print("root_block:::::::::::", root_block)
# print("root_block:::::::::::", root_block)
# 更新任务状态为执行中
async with get_async_session() as session:
await self._update_task_status(session, TaskStatus.RUNNING, "任务执行中", task_detail=task_detail_str)
@ -384,22 +385,9 @@ class TaskExecutor:
finally:
self.is_running = False
# 清理任务执行器,释放内存
# 统一调用 cleanup() 方法清理所有资源
try:
if self.task_context:
if hasattr(self.task_context, 'cleanup'):
self.task_context.cleanup()
self.task_context = None
if self.block_executor:
self.block_executor.task_context = None
self.block_executor = None
# 清空任务记录引用
self.task_record = None
self.task_def = None
logger.debug(f"任务执行器 {self.task_record_id} 已清理")
self.cleanup()
except Exception as cleanup_error:
logger.error(f"清理任务执行器失败: {str(cleanup_error)}")
@ -624,4 +612,51 @@ class TaskExecutor:
except Exception as e:
logger.error(f"外部API同步失败: {str(e)}")
# 外部API失败不应该影响主任务流程只记录日志
# 外部API失败不应该影响主任务流程只记录日志
def cleanup(self) -> None:
"""
清理 TaskExecutor 占用的资源
优化彻底清理所有子组件和引用防止内存泄漏
"""
# 防止重复清理
if self._is_cleaned:
return
try:
# 先清理 block_executor (按依赖顺序: block_executor -> task_context)
if self.block_executor:
try:
if hasattr(self.block_executor, 'cleanup'):
self.block_executor.cleanup()
except Exception as e:
logger.error(f"清理 block_executor 失败: {str(e)}")
finally:
self.block_executor = None
# 再清理 task_context
if self.task_context:
try:
if hasattr(self.task_context, 'cleanup'):
self.task_context.cleanup()
except Exception as e:
logger.error(f"清理 task_context 失败: {str(e)}")
finally:
self.task_context = None
# 清空其他引用
self.task_record = None
self.task_def = None
self.error_message = None
# 重置状态
self.is_running = False
self.is_canceled = False
self.is_error = False
# 标记已清理
self._is_cleaned = True
logger.debug(f"TaskExecutor {self.task_record_id} 已彻底清理")
except Exception as e:
logger.error(f"清理 TaskExecutor 失败: {str(e)}")

View File

@ -727,10 +727,9 @@ async def set_task_failed(task_id: str, token: str = None) -> Optional[ApiRespon
# 构建请求头
headers = {}
headers[TFApiConfig.TOKEN_HEADER] = token
headers["x-tenant-id"] = "1000"
# headers["x-tenant-id"] = "1000"
try:
logger.info(f"正在设置任务状态为已失败: {task_id}")
timeout = aiohttp.ClientTimeout(total=TFApiConfig.TIMEOUT, connect=5)
async with aiohttp.ClientSession(timeout=timeout, trust_env=False) as session:
async with session.put(