All files / src/utils text-sanitiser.js

100% Statements 33/33
100% Branches 18/18
100% Functions 5/5
100% Lines 26/26

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85                          393x   390x 390x 390x 153685x 60x 60x   153625x 59x 59x   153566x   390x                         386x 384x                 381x 380x                   395x 377x 377x 377x 377x                   182x 182x   168x 163x     83x  
/**
 * Text sanitisation utilities for user-generated content.
 *
 * Strips HTML tags, zero-width characters, null bytes, and other
 * potentially dangerous or invisible content from user input.
 */
 
/**
 * Strip all HTML tags from text.
 * @param {string} text
 * @returns {string}
 */
function stripHtml(text) {
  if (typeof text !== 'string') return '';
  // Strip HTML tags safely (no backtracking risk)
  let result = '';
  let inTag = false;
  for (const ch of text) {
    if (ch === '<') {
      inTag = true;
      continue;
    }
    if (ch === '>') {
      inTag = false;
      continue;
    }
    if (!inTag) result += ch;
  }
  return result;
}
 
/**
 * Strip zero-width and invisible formatting characters.
 * U+200B zero-width space, U+200C zero-width non-joiner,
 * U+200D zero-width joiner (kept in emoji context handled separately),
 * U+2063 invisible separator, U+FEFF byte order mark,
 * U+202E right-to-left override (bidi attack prevention).
 * @param {string} text
 * @returns {string}
 */
function stripZeroWidth(text) {
  if (typeof text !== 'string') return '';
  return text.replace(/[\u200B\u200C\u2063\uFEFF\u202E]/g, '');
}
 
/**
 * Strip null bytes.
 * @param {string} text
 * @returns {string}
 */
function stripNullBytes(text) {
  if (typeof text !== 'string') return '';
  return text.replace(/\0/g, '');
}
 
/**
 * Full sanitisation pipeline for user text input.
 * Strips HTML, zero-width chars, null bytes, and trims whitespace.
 * @param {string} text
 * @returns {string}
 */
function sanitise(text) {
  if (typeof text !== 'string') return '';
  let clean = stripHtml(text);
  clean = stripZeroWidth(clean);
  clean = stripNullBytes(clean);
  return clean.trim();
}
 
/**
 * Sanitise a suggestion title. Returns null if invalid.
 * Must contain at least one letter character from any script.
 * @param {string} text
 * @returns {string|null}
 */
function sanitiseTitle(text) {
  const clean = sanitise(text);
  if (!clean) return null;
  // Must contain at least one Unicode letter (any script)
  if (!/[\p{L}]/u.test(clean)) return null;
  return clean;
}
 
module.exports = { stripHtml, stripZeroWidth, stripNullBytes, sanitise, sanitiseTitle };