Skip to content

Commit 5f23f81

Browse files
committed
refactor: extract pdf coordinate tolerances to named constants and add jsdoc
- extract magic number 2 to PDF_LINE_Y_TOLERANCE and PDF_NODE_GAP_THRESHOLD constants with explanatory comments - add jsdoc to buildLinesFromPDFNodes explaining the spatial reconstruction algorithm WCN-19
1 parent 45ec5bf commit 5f23f81

1 file changed

Lines changed: 18 additions & 3 deletions

File tree

modules/key-card/src/parseKeycard.ts

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,12 @@ const sectionHeaderRegex = /^([A-D])\s*[:.)-]\s*(.+?)\s*$/i;
1515
const dataLineRegex = /^data\s*:\s*(.*)$/i;
1616
const faqHeaderRegex = /^BitGo\s+KeyCard\s+FAQ$/i;
1717

18+
// PDF coordinate tolerance in points. Nodes within this distance on the Y-axis
19+
// are treated as belonging to the same line; nodes further apart are separate lines.
20+
const PDF_LINE_Y_TOLERANCE = 2;
21+
// Horizontal gap in points above which a space is inserted between adjacent nodes.
22+
const PDF_NODE_GAP_THRESHOLD = 2;
23+
1824
function sanitizeText(input: string): string {
1925
return input.replace(/\s+/g, ' ').trim();
2026
}
@@ -43,13 +49,22 @@ function isEncryptedWalletPasswordSectionTitle(title: string): boolean {
4349
return title.toLowerCase().includes('encrypted wallet password');
4450
}
4551

52+
/**
53+
* Reconstructs logical text lines from an unordered set of PDF text nodes.
54+
*
55+
* PDF text extraction returns individual positioned fragments. This function
56+
* sorts them by page then Y-coordinate (top-to-bottom), groups fragments
57+
* within PDF_LINE_Y_TOLERANCE points of each other onto the same line, and
58+
* inserts a space between fragments that are separated by more than
59+
* PDF_NODE_GAP_THRESHOLD points horizontally.
60+
*/
4661
export function buildLinesFromPDFNodes(nodes: PDFTextNode[]): string[] {
4762
const sortedNodes = [...nodes].sort((a, b) => {
4863
if (a.page !== b.page) {
4964
return a.page - b.page;
5065
}
5166
const yDiff = Math.abs(a.y - b.y);
52-
if (yDiff > 2) {
67+
if (yDiff > PDF_LINE_Y_TOLERANCE) {
5368
return b.y - a.y;
5469
}
5570
return a.x - b.x;
@@ -74,7 +89,7 @@ export function buildLinesFromPDFNodes(nodes: PDFTextNode[]): string[] {
7489
continue;
7590
}
7691

77-
if (previousRightEdge !== null && node.x - previousRightEdge > 2) {
92+
if (previousRightEdge !== null && node.x - previousRightEdge > PDF_NODE_GAP_THRESHOLD) {
7893
line += ' ';
7994
}
8095
line += piece;
@@ -89,7 +104,7 @@ export function buildLinesFromPDFNodes(nodes: PDFTextNode[]): string[] {
89104

90105
for (const node of sortedNodes) {
91106
const pageChanged = node.page !== currentPage;
92-
const lineChanged = Number.isNaN(currentY) || Math.abs(node.y - currentY) > 2;
107+
const lineChanged = Number.isNaN(currentY) || Math.abs(node.y - currentY) > PDF_LINE_Y_TOLERANCE;
93108
if (pageChanged || lineChanged) {
94109
flushLine();
95110
currentLineNodes = [node];

0 commit comments

Comments
 (0)