目录
流程
代码
分析
获取可以运行的进程的信息
检查HMI 的模块信息
检查被监控的组件
检查其他组件
判断进程状态UpdateStatus
流程
代码
class ProcessMonitor : public RecurrentRunner {
public:
ProcessMonitor();
void RunOnce(const double current_time) override;
private:
static void UpdateStatus(
const std::vector<std::string>& running_processes,
const apollo::dreamview::ProcessMonitorConfig& config,
ComponentStatus* status);
};
void ProcessMonitor::RunOnce(const double current_time) {
// Get running processes.
std::vector<std::string> running_processes;
for (const auto& cmd_file : cyber::common::Glob("/proc/*/cmdline")) {
// Get process command string.
std::string cmd_string;
if (cyber::common::GetContent(cmd_file, &cmd_string) &&
!cmd_string.empty()) {
// In /proc/<PID>/cmdline, the parts are separated with \0, which will be
// converted back to whitespaces here.
std::replace(cmd_string.begin(), cmd_string.end(), '\0', ' ');
running_processes.push_back(cmd_string);
}
}
auto manager = MonitorManager::Instance();
const auto& mode = manager->GetHMIMode();
// Check HMI modules.
auto* hmi_modules = manager->GetStatus()->mutable_hmi_modules();
for (const auto& iter : mode.modules()) {
const std::string& module_name = iter.first;
const auto& config = iter.second.process_monitor_config();
UpdateStatus(running_processes, config, &hmi_modules->at(module_name));
}
// Check monitored components.
auto* components = manager->GetStatus()->mutable_components();
for (const auto& iter : mode.monitored_components()) {
const std::string& name = iter.first;
if (iter.second.has_process() &&
apollo::common::util::ContainsKey(*components, name)) {
const auto& config = iter.second.process();
auto* status = components->at(name).mutable_process_status();
UpdateStatus(running_processes, config, status);
}
}
// Check other components.
auto* other_components = manager->GetStatus()->mutable_other_components();
for (const auto& iter : mode.other_components()) {
const std::string& name = iter.first;
const auto& config = iter.second;
UpdateStatus(running_processes, config, &other_components->at(name));
}
}
分析
核心的处理逻辑还在runOnce 中
获取可以运行的进程的信息
// Get running processes.
std::vector<std::string> running_processes;
for (const auto& cmd_file : cyber::common::Glob("/proc/*/cmdline")) {
// Get process command string.
std::string cmd_string;
if (cyber::common::GetContent(cmd_file, &cmd_string) &&
!cmd_string.empty()) {
// In /proc/<PID>/cmdline, the parts are separated with \0, which will be
// converted back to whitespaces here.
std::replace(cmd_string.begin(), cmd_string.end(), '\0', ' ');
running_processes.push_back(cmd_string);
}
}
首先迭代遍历所有的/proc/*/cmdline 文件,这个文件是描述进程相关信息的,中间的*就是pid关于该文件的具体内容可以参考:https://blog.csdn.net/whatday/article/details/108897457
上述文件中可以获取该进程的命令行参数,包括进程的启动路径(argv[0])
检查HMI 的模块信息
// Check HMI modules.
auto* hmi_modules = manager->GetStatus()->mutable_hmi_modules();
for (const auto& iter : mode.modules()) {
const std::string& module_name = iter.first;
const auto& config = iter.second.process_monitor_config();
UpdateStatus(running_processes, config, &hmi_modules->at(module_name));
}
- 获取所有的HMI配置的module;
- 将系统中检查到的运行的进程与配置的运行的module 进行检查对比,把最终的检查状态给到&hmi_modules->at(module_name)
检查被监控的组件
// Check monitored components.
auto* components = manager->GetStatus()->mutable_components();
for (const auto& iter : mode.monitored_components()) {
const std::string& name = iter.first;
if (iter.second.has_process() &&
apollo::common::util::ContainsKey(*components, name)) {
const auto& config = iter.second.process();
auto* status = components->at(name).mutable_process_status();
UpdateStatus(running_processes, config, status);
}
}
- 首先获取所有的组件;
- 遍历要监控的组件,获取到进程的config 信息;
- 给到UpdateStatus进行最终的判断;
检查其他组件
auto* other_components = manager->GetStatus()->mutable_other_components();
for (const auto& iter : mode.other_components()) {
const std::string& name = iter.first;
const auto& config = iter.second;
UpdateStatus(running_processes, config, &other_components->at(name));
}
}
逻辑同上
判断进程状态UpdateStatus
void ProcessMonitor::UpdateStatus(
const std::vector<std::string>& running_processes,
const apollo::dreamview::ProcessMonitorConfig& config,
ComponentStatus* status) {
status->clear_status();
for (const std::string& command : running_processes) {
bool all_keywords_matched = true;
for (const std::string& keyword : config.command_keywords()) {
if (command.find(keyword) == std::string::npos) {
all_keywords_matched = false;
break;
}
}
if (all_keywords_matched) {
// Process command keywords are all matched. The process is running.
SummaryMonitor::EscalateStatus(ComponentStatus::OK, command, status);
return;
}
}
SummaryMonitor::EscalateStatus(ComponentStatus::FATAL, "", status);
}
这就是整个进程监控的核心判断函数了。
- 遍历之前从/proc/*/cmdline中获取到的running_processes;
- 从config 中遍历command_keywords,并看running_processes中是否能找到一个程序名(启动位置),命名行参数都能匹配上的item
- 如果找到就是true,如果找不到就是false