All files / src/cron serverHealth.js

100% Statements 37/37
100% Branches 20/20
100% Functions 4/4
100% Lines 36/36

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99            1x 1x 1x     1x         23x 23x 23x 23x 23x   23x 23x   23x 3x                           23x 16x 16x 16x 15x 2x 2x   13x 13x 12x 15x 14x 14x 14x   14x 3x                         1x           14x       1x   13x         1x       23x           1x  
/**
 * Server health check cron — monitors memory usage and PM2 restarts.
 *
 * Runs every 5 minutes. Creates alerts when thresholds are exceeded.
 */
 
const { execFile } = require('node:child_process');
const os = require('node:os');
const log = require('../utils/log');
 
// Track last-known restart counts to only alert on NEW restarts
const lastRestartCounts = {};
 
async function serverHealth(alertManager) {
  // Check memory usage — RSS vs system total (not V8 heap ratio, which is
  // misleadingly high because V8 keeps heapTotal close to heapUsed)
  const mem = process.memoryUsage();
  const systemTotalBytes = os.totalmem();
  const rssPercent = (mem.rss / systemTotalBytes) * 100;
  const rssMB = Math.round(mem.rss / 1024 / 1024);
  const systemTotalMB = Math.round(systemTotalBytes / 1024 / 1024);
 
  const config = alertManager.getConfig();
  const memThreshold = config.serverMemoryWarningPercent || 30;
 
  if (rssPercent > memThreshold) {
    await alertManager.createAlert(
      'high_memory',
      'warning',
      'High server memory usage',
      `RSS at ${rssMB}MB / ${systemTotalMB}MB (${rssPercent.toFixed(1)}%, threshold: ${memThreshold}%)`,
      {
        rssMB,
        systemTotalMB,
        rssPercent: Math.round(rssPercent * 10) / 10,
      },
    );
  }
 
  // Check PM2 restart count — only alert on NEW restarts since last check
  if (config.pm2RestartAlert) {
    try {
      await new Promise((resolve) => {
        execFile('pm2', ['jlist'], { timeout: 10000 }, (err, stdout) => {
          if (err || !stdout) {
            resolve();
            return;
          }
          try {
            const processes = JSON.parse(stdout);
            for (const proc of processes) {
              if (!proc.pm2_env) continue;
              const name = proc.name;
              const restarts = proc.pm2_env.restart_time || 0;
              const lastKnown = lastRestartCounts[name] || 0;
 
              if (restarts > lastKnown && lastKnown > 0) {
                alertManager
                  .createAlert(
                    'pm2_restart',
                    'warning',
                    `PM2 process restarted: ${name}`,
                    `${restarts - lastKnown} new restart(s) (total: ${restarts})`,
                    {
                      processName: name,
                      restartCount: restarts,
                      newRestarts: restarts - lastKnown,
                    },
                  )
                  .catch((alertErr) =>
                    log.error('server-health', 'Failed to create PM2 restart alert', {
                      error: alertErr.message,
                    }),
                  );
              }
 
              lastRestartCounts[name] = restarts;
            }
          } catch {
            // PM2 output parsing failed — logged as warning but non-fatal for health check
            log.warn('cron', 'serverHealth: failed to parse PM2 output');
          }
          resolve();
        });
      });
    } catch (pm2Err) {
      // PM2 binary unavailable or exec failed — log and continue health check
      log.warn('cron', 'serverHealth: PM2 check failed', { error: pm2Err.message });
    }
  }
 
  log.debug('cron', 'serverHealth: check completed', {
    rssMB,
    rssPercent: Math.round(rssPercent * 10) / 10,
  });
}
 
module.exports = serverHealth;