diff --git a/mrz_reader.py b/mrz_reader.py new file mode 100644 index 0000000..d810bf4 --- /dev/null +++ b/mrz_reader.py @@ -0,0 +1,56 @@ +import sys +import json +import os +import pytesseract +from passporteye import read_mrz + +pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' +os.environ['TESSDATA_PREFIX'] = r'C:\Program Files\Tesseract-OCR\tessdata' + +def fix_nationality(raw): + # Fix common substitutions in 3-char nationality codes + subs = {'0': 'O', '1': 'I', '3': 'A', '8': 'B', '6': 'G', '5': 'S'} + return ''.join(subs.get(c, c) for c in raw.upper()[:3]) + +def fix_last_name(raw): + # H between two vowel-adjacent consonants is likely M + # More targeted: SLI_ANI pattern — the _ is M + import re + raw = re.sub(r'(?<=[A-Z])H(?=[A-Z])', 'M', raw) + return raw + +def parse_dob(raw): + yy = ''.join(c for c in raw[:2] if c.isdigit()) + if not yy: + return None + yyyy = f"19{yy}" if int(yy) >= 30 else f"20{yy}" + return f"{yyyy}-01-01" + +def parse_expiry(raw): + raw = raw.upper().replace('F', '5').replace('S', '5').replace('O', '0').replace('I', '1').replace('B', '8') + digits = ''.join(c for c in raw if c.isdigit()) + if len(digits) < 6: + return None + yy = digits[0:2] + mm = digits[2:4] + dd = digits[4:6] + yyyy = f"20{yy}" + mm = mm if 1 <= int(mm) <= 12 else "01" + dd = dd if 1 <= int(dd) <= 31 else "01" + return f"{yyyy}-{mm}-{dd}" + +image_path = sys.argv[1] +mrz = read_mrz(image_path, extra_cmdline_params='--oem 0') + +if mrz is None: + print(json.dumps({"error": "No MRZ detected"})) +else: + d = mrz.to_dict() + print(json.dumps({ + "firstName": d.get("names", "").strip(), + "lastName": fix_last_name(d.get("surname", "").strip()), + "passportNumber": d.get("number", "").strip(), + "nationality": fix_nationality(d.get("nationality", "")), + "dateOfBirth": parse_dob(d.get("date_of_birth", "")), + "passportExpiry": parse_expiry(d.get("expiration_date", "")), + })) \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index 04f4572..2216b92 100644 --- a/package-lock.json +++ b/package-lock.json @@ -15,6 +15,7 @@ "express": "^5.2.1", "joi": "^18.1.2", "jsonwebtoken": "^9.0.3", + "multer": "^2.1.1", "pdfkit": "^0.18.0", "pg": "^8.20.0", "qrcode": "^1.5.4", @@ -162,6 +163,12 @@ "node": ">= 8" } }, + "node_modules/append-field": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/append-field/-/append-field-1.0.0.tgz", + "integrity": "sha512-klpgFSWLW1ZEs8svjfb7g4qWY0YS5imI82dTg+QahUvJ8YqAY0P10Uk8tTyh9ZGuYEZEMaeJYCF5BFuX552hsw==", + "license": "MIT" + }, "node_modules/balanced-match": { "version": "4.0.4", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz", @@ -288,6 +295,23 @@ "integrity": "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==", "license": "BSD-3-Clause" }, + "node_modules/buffer-from": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz", + "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==", + "license": "MIT" + }, + "node_modules/busboy": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/busboy/-/busboy-1.6.0.tgz", + "integrity": "sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA==", + "dependencies": { + "streamsearch": "^1.1.0" + }, + "engines": { + "node": ">=10.16.0" + } + }, "node_modules/bytes": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz", @@ -398,6 +422,21 @@ "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", "license": "MIT" }, + "node_modules/concat-stream": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/concat-stream/-/concat-stream-2.0.0.tgz", + "integrity": "sha512-MWufYdFw53ccGjCA+Ol7XJYpAlW6/prSMzuPOTRnJGcGzuhLn4Scrz7qf6o8bROZ514ltazcIFJZevcfbo0x7A==", + "engines": [ + "node >= 6.0" + ], + "license": "MIT", + "dependencies": { + "buffer-from": "^1.0.0", + "inherits": "^2.0.3", + "readable-stream": "^3.0.2", + "typedarray": "^0.0.6" + } + }, "node_modules/content-disposition": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-1.1.0.tgz", @@ -1199,6 +1238,68 @@ "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", "license": "MIT" }, + "node_modules/multer": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/multer/-/multer-2.1.1.tgz", + "integrity": "sha512-mo+QTzKlx8R7E5ylSXxWzGoXoZbOsRMpyitcht8By2KHvMbf3tjwosZ/Mu/XYU6UuJ3VZnODIrak5ZrPiPyB6A==", + "license": "MIT", + "dependencies": { + "append-field": "^1.0.0", + "busboy": "^1.6.0", + "concat-stream": "^2.0.0", + "type-is": "^1.6.18" + }, + "engines": { + "node": ">= 10.16.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/multer/node_modules/media-typer": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", + "integrity": "sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/multer/node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/multer/node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "license": "MIT", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/multer/node_modules/type-is": { + "version": "1.6.18", + "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz", + "integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==", + "license": "MIT", + "dependencies": { + "media-typer": "0.3.0", + "mime-types": "~2.1.24" + }, + "engines": { + "node": ">= 0.6" + } + }, "node_modules/negotiator": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-1.0.0.tgz", @@ -1607,6 +1708,20 @@ "node": ">= 0.10" } }, + "node_modules/readable-stream": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", + "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", + "license": "MIT", + "dependencies": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/readdirp": { "version": "3.6.0", "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz", @@ -1855,6 +1970,23 @@ "node": ">= 0.8" } }, + "node_modules/streamsearch": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/streamsearch/-/streamsearch-1.1.0.tgz", + "integrity": "sha512-Mcc5wHehp9aXz1ax6bZUyY5afg9u2rv5cqQI3mRrYkGC8rW2hM02jWuwjtL++LS5qinSyhj2QfLyNsuc+VsExg==", + "engines": { + "node": ">=10.0.0" + } + }, + "node_modules/string_decoder": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", + "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", + "license": "MIT", + "dependencies": { + "safe-buffer": "~5.2.0" + } + }, "node_modules/string-width": { "version": "4.2.3", "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", @@ -1952,6 +2084,12 @@ "node": ">= 0.6" } }, + "node_modules/typedarray": { + "version": "0.0.6", + "resolved": "https://registry.npmjs.org/typedarray/-/typedarray-0.0.6.tgz", + "integrity": "sha512-/aCDEGatGvZ2BIk+HmLf4ifCJFwvKFNb9/JeZPMulfgFracn9QFcAf5GO8B/mweUjSoblS5In0cWhqpfs/5PQA==", + "license": "MIT" + }, "node_modules/undefsafe": { "version": "2.0.5", "resolved": "https://registry.npmjs.org/undefsafe/-/undefsafe-2.0.5.tgz", @@ -1994,6 +2132,12 @@ "node": ">= 0.8" } }, + "node_modules/util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", + "license": "MIT" + }, "node_modules/uuid": { "version": "14.0.0", "resolved": "https://registry.npmjs.org/uuid/-/uuid-14.0.0.tgz", diff --git a/package.json b/package.json index 709251b..8ef9e0e 100644 --- a/package.json +++ b/package.json @@ -18,6 +18,7 @@ "express": "^5.2.1", "joi": "^18.1.2", "jsonwebtoken": "^9.0.3", + "multer": "^2.1.1", "pdfkit": "^0.18.0", "pg": "^8.20.0", "qrcode": "^1.5.4", diff --git a/routes/checkin.js b/routes/checkin.js index 569c781..eb05816 100644 --- a/routes/checkin.js +++ b/routes/checkin.js @@ -4,6 +4,23 @@ const db = require("../src/config/db"); const router = express.Router(); +const multer = require("multer"); +const fs = require("fs"); +const { execFile } = require("child_process"); + +const upload = multer({ + dest: "tmp/", + limits: { fileSize: 10 * 1024 * 1024 }, + fileFilter: (req, file, cb) => { + const allowed = ["image/jpeg", "image/png", "image/jpg"]; + if (allowed.includes(file.mimetype)) { + cb(null, true); + } else { + cb(new Error("Only JPEG and PNG images are allowed")); + } + }, +}); + router.get("/booking/lookup", async (req, res, next) => { try { const { reference, lastName } = req.query; @@ -145,6 +162,60 @@ router.post("/checkin/:id/passport", async (req, res, next) => { } }); +router.post("/checkin/:id/passport/ocr", + upload.single("passport"), + async (req, res, next) => { + const filePath = req.file?.path; + try { + const checkInId = req.params.id; + if (!req.file) { + return res.status(400).json({ message: "Passport image is required" }); + } + const checkInResult = await db.query( + "SELECT id FROM CHECKIN WHERE id = $1", [checkInId] + ); + if (checkInResult.rows.length === 0) { + return res.status(404).json({ message: "Check-in not found" }); + } + const parsed = await new Promise((resolve, reject) => { + execFile("python", ["mrz_reader.py", filePath], (err, stdout, stderr) => { + if (err) return reject(err); + try { + resolve(JSON.parse(stdout)); + } catch (e) { + reject(new Error("Failed to parse PassportEye output: " + stdout)); + } + }); + }); + console.log("[OCR] PassportEye result:", parsed); + if (parsed.error || !parsed.passportNumber) { + return res.status(422).json({ + message : "Could not extract passport data. Please fill in manually.", + extracted: null, + }); + } + await db.query( + `UPDATE CHECKIN + SET "firstName" = $1, + "lastName" = $2, + "passportNumber" = $3, + "passportExpiry" = $4, + nationality = $5, + "dateOfBirth" = $6, + "isIdentityVerified" = FALSE + WHERE id = $7`, + [parsed.firstName, parsed.lastName, parsed.passportNumber, + parsed.passportExpiry, parsed.nationality, parsed.dateOfBirth, checkInId] + ); + return res.status(200).json({ checkInId, extracted: parsed }); + } catch (err) { + return next(err); + } finally { + if (filePath && fs.existsSync(filePath)) fs.unlinkSync(filePath); + } + } +); + router.post("/checkin/:id/passport/confirm", async (req, res, next) => { try { const checkInId = req.params.id;