diff --git a/scrape/utils/parseAbsence.js b/scrape/utils/parseAbsence.js index 8a9bd25..5c52514 100644 --- a/scrape/utils/parseAbsence.js +++ b/scrape/utils/parseAbsence.js @@ -1,84 +1,106 @@ export default function parseAbsence(str, teacherMap) { - str = str.trim().replace(/\s+/g, " "); + const LAST_HOUR = 10; + const s = (str ?? "").trim().replace(/\s+/g, " "); const result = { - teacher: null, - teacherCode: null, - type: null, // "wholeDay" | "range" | "single" | "invalid" - hours: null + teacher: null, // String or null + teacherCode: null, // String or null + type: null, // "wholeDay" | "range" | "single" | "invalid" + hours: null // { from, to } or number }; - // Patterns (teacher can be before or after the hours) - const wholeDayPattern = /^([A-Za-z]+)$/; - const rangePattern = /^(\d+)\s*-\s*(\d+)\s+([A-Za-z]+)$|^([A-Za-z]+)\s+(\d+)\s*-\s*(\d+)$/; - const singleHourPattern = /^(\d+)\s+([A-Za-z]+)$|^([A-Za-z]+)\s+(\d+)$/; - const plusPattern = /^(\d+)\+\s+([A-Za-z]+)$|^([A-Za-z]+)\s+(\d+)\+$/; - - if (rangePattern.test(str)) { - const match = str.match(rangePattern); - // Case 1: hours first, then teacher - if (match[1] && match[2] && match[3]) { - const [, from, to, teacherCode] = match; - result.teacherCode = teacherCode; - result.teacher = teacherMap[teacherCode.toLowerCase()]; - result.type = "range"; - result.hours = { from: parseInt(from), to: parseInt(to) }; - } - // Case 2: teacher first, then hours - else { - const [, , , teacherCode, from, to] = match; - result.teacherCode = teacherCode; - result.teacher = teacherMap[teacherCode.toLowerCase()]; - result.type = "range"; - result.hours = { from: parseInt(from), to: parseInt(to) }; - } - } - else if (plusPattern.test(str)) { - const match = str.match(plusPattern); - if (match[1] && match[2]) { - // hours first - const [, from, teacherCode] = match; - result.teacherCode = teacherCode; - result.teacher = teacherMap[teacherCode.toLowerCase()]; - result.type = "range"; - result.hours = { from: parseInt(from), to: 10 }; - } else { - // teacher first - const [, , teacherCode, from] = match; - result.teacherCode = teacherCode; - result.teacher = teacherMap[teacherCode.toLowerCase()]; - result.type = "range"; - result.hours = { from: parseInt(from), to: 10 }; - } - } - else if (singleHourPattern.test(str)) { - const match = str.match(singleHourPattern); - if (match[1] && match[2]) { - // hours first - const [, hour, teacherCode] = match; - result.teacherCode = teacherCode; - result.teacher = teacherMap[teacherCode.toLowerCase()]; - result.type = "single"; - result.hours = parseInt(hour); - } else { - // teacher first - const [, , teacherCode, hour] = match; - result.teacherCode = teacherCode; - result.teacher = teacherMap[teacherCode.toLowerCase()]; - result.type = "single"; - result.hours = parseInt(hour); - } - } - else if (wholeDayPattern.test(str)) { - const [, teacherCode] = str.match(wholeDayPattern); - result.teacher = teacherMap[teacherCode.toLowerCase()]; - result.teacherCode = teacherCode; - result.type = "wholeDay"; - } - else { + if (!s) { result.type = "invalid"; - result.original = str; + result.original = s; + return result; } + const isTeacher = (t) => /^[A-Za-z]+$/.test(t); + + const parseSpec = (spec) => { + let m; + // "5+" -> range 5..LAST_HOUR + if ((m = spec.match(/^(\d+)\+$/))) { + const from = parseInt(m[1], 10); + if (from >= 1 && from <= LAST_HOUR) { + return { kind: "range", value: { from, to: LAST_HOUR } }; + } + return null; + } + // "5-7" -> range 5..7 + if ((m = spec.match(/^(\d+)-(\d+)$/))) { + const from = parseInt(m[1], 10); + const to = parseInt(m[2], 10); + if (from >= 1 && to >= 1 && from <= to && to <= LAST_HOUR) { + return { kind: "range", value: { from, to } }; + } + return null; + } + // "5" -> single 5 + if ((m = spec.match(/^(\d+)$/))) { + const hour = parseInt(m[1], 10); + if (hour >= 1 && hour <= LAST_HOUR) { + return { kind: "single", value: hour }; + } + return null; + } + return null; + }; + + const parts = s.split(" "); + if (parts.length === 1) { + const t = parts[0]; + if (isTeacher(t)) { + result.teacherCode = t; + result.teacher = teacherMap?.[t.toLowerCase()] ?? null; + result.type = "wholeDay"; + return result; + } + result.type = "invalid"; + result.original = s; + return result; + } + + if (parts.length === 2) { + const [a, b] = parts; + + // Teacher first: "Nm 5", "Nm 5-7", "Nm 5+" + if (isTeacher(a)) { + const spec = parseSpec(b); + if (!spec) { + result.type = "invalid"; + result.original = s; + return result; + } + result.teacherCode = a; + result.teacher = teacherMap?.[a.toLowerCase()] ?? null; + result.type = spec.kind === "range" ? "range" : "single"; + result.hours = spec.value; + return result; + } + + // Teacher last: "5 Nm", "5-7 Nm", "5+ Nm" + if (isTeacher(b)) { + const spec = parseSpec(a); + if (!spec) { + result.type = "invalid"; + result.original = s; + return result; + } + result.teacherCode = b; + result.teacher = teacherMap?.[b.toLowerCase()] ?? null; + result.type = spec.kind === "range" ? "range" : "single"; + result.hours = spec.value; + return result; + } + + result.type = "invalid"; + result.original = s; + return result; + } + + // Anything else is invalid + result.type = "invalid"; + result.original = s; return result; }