Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | 119x 83x 59x 59x 59x 53x 50x 3x 47x 1730x 1980x 47x 1683x 524084x 524084x 47x 54x 54x 54x 52x 49x 33x 33x 33x 33x 83x | /**
* Text similarity utilities for duplicate detection and blocked topic matching.
*
* Uses normalised Levenshtein distance to compute similarity ratio (0-1).
*/
/**
* Normalise text for comparison: lowercase, strip punctuation, collapse whitespace.
* @param {string} text
* @returns {string}
*/
function normalise(text) {
return (text || '')
.toLowerCase()
.replace(/[^\w\s\u00C0-\u024F\u0400-\u04FF\u0600-\u06FF\u3000-\u9FFF\uAC00-\uD7AF]/g, '')
.replace(/\s+/g, ' ')
.trim();
}
/**
* Compute Levenshtein edit distance between two strings.
* @param {string} a
* @param {string} b
* @returns {number}
*/
const MAX_EDIT_DISTANCE_LEN = 500;
function editDistance(a, b) {
// Coerce to strings to prevent type confusion if called with non-string input
a = typeof a === 'string' ? a : String(a ?? '');
b = typeof b === 'string' ? b : String(b ?? '');
if (a.length === 0) return b.length;
if (b.length === 0) return a.length;
// Prevent CPU exhaustion with very long strings (O(n*m) algorithm)
if (a.length > MAX_EDIT_DISTANCE_LEN || b.length > MAX_EDIT_DISTANCE_LEN) {
return a === b ? 0 : Math.max(a.length, b.length);
}
const matrix = [];
for (let i = 0; i <= b.length; i++) matrix[i] = [i];
for (let j = 0; j <= a.length; j++) matrix[0][j] = j;
for (let i = 1; i <= b.length; i++) {
for (let j = 1; j <= a.length; j++) {
const cost = b[i - 1] === a[j - 1] ? 0 : 1;
matrix[i][j] = Math.min(
matrix[i - 1][j] + 1,
matrix[i][j - 1] + 1,
matrix[i - 1][j - 1] + cost,
);
}
}
return matrix[b.length][a.length];
}
/**
* Compute similarity ratio between two strings (0 = completely different, 1 = identical).
* Uses normalised Levenshtein distance.
* @param {string} a
* @param {string} b
* @returns {number}
*/
function similarity(a, b) {
const sa = normalise(a);
const sb = normalise(b);
if (!sa && !sb) return 1;
if (!sa || !sb) return 0;
if (sa === sb) return 1;
const longer = sa.length >= sb.length ? sa : sb;
const shorter = sa.length >= sb.length ? sb : sa;
const dist = editDistance(longer, shorter);
return (longer.length - dist) / longer.length;
}
module.exports = { similarity, normalise, editDistance };
|