diff --git a/scrape/utils/parseAbsence.js b/scrape/utils/parseAbsence.js index 3cc80de..8a9bd25 100644 --- a/scrape/utils/parseAbsence.js +++ b/scrape/utils/parseAbsence.js @@ -2,37 +2,79 @@ export default function parseAbsence(str, teacherMap) { str = str.trim().replace(/\s+/g, " "); const result = { - teacher: null, // String or null - teacherCode: null, // String or null - type: null, // "wholeDay" | "range" | "single" | "invalid" - hours: null // { from: number, to: number } or number + teacher: null, + teacherCode: null, + type: null, // "wholeDay" | "range" | "single" | "invalid" + hours: null }; - // Regex patterns (with flexible spaces) - const wholeDayPattern = /^([A-Za-z]+)$/; - const rangePattern = /^(\d+)\s*-\s*(\d+)\s+([A-Za-z]+)$/; - const singleHourPattern = /^(\d+)\s+([A-Za-z]+)$/; + // Patterns (teacher can be before or after the hours) + const wholeDayPattern = /^([A-Za-z]+)$/; + const rangePattern = /^(\d+)\s*-\s*(\d+)\s+([A-Za-z]+)$|^([A-Za-z]+)\s+(\d+)\s*-\s*(\d+)$/; + const singleHourPattern = /^(\d+)\s+([A-Za-z]+)$|^([A-Za-z]+)\s+(\d+)$/; + const plusPattern = /^(\d+)\+\s+([A-Za-z]+)$|^([A-Za-z]+)\s+(\d+)\+$/; if (rangePattern.test(str)) { - const [, from, to, teacherCode] = str.match(rangePattern); - result.teacher = teacherMap[teacherCode.toLowerCase()]; - result.teacherCode = teacherCode; - result.type = "range"; - result.hours = { from: parseInt(from), to: parseInt(to) }; - } + const match = str.match(rangePattern); + // Case 1: hours first, then teacher + if (match[1] && match[2] && match[3]) { + const [, from, to, teacherCode] = match; + result.teacherCode = teacherCode; + result.teacher = teacherMap[teacherCode.toLowerCase()]; + result.type = "range"; + result.hours = { from: parseInt(from), to: parseInt(to) }; + } + // Case 2: teacher first, then hours + else { + const [, , , teacherCode, from, to] = match; + result.teacherCode = teacherCode; + result.teacher = teacherMap[teacherCode.toLowerCase()]; + result.type = "range"; + result.hours = { from: parseInt(from), to: parseInt(to) }; + } + } + else if (plusPattern.test(str)) { + const match = str.match(plusPattern); + if (match[1] && match[2]) { + // hours first + const [, from, teacherCode] = match; + result.teacherCode = teacherCode; + result.teacher = teacherMap[teacherCode.toLowerCase()]; + result.type = "range"; + result.hours = { from: parseInt(from), to: 10 }; + } else { + // teacher first + const [, , teacherCode, from] = match; + result.teacherCode = teacherCode; + result.teacher = teacherMap[teacherCode.toLowerCase()]; + result.type = "range"; + result.hours = { from: parseInt(from), to: 10 }; + } + } else if (singleHourPattern.test(str)) { - const [, hour, teacherCode] = str.match(singleHourPattern); - result.teacher = teacherMap[teacherCode.toLowerCase()]; - result.teacherCode = teacherCode; - result.type = "single"; - result.hours = parseInt(hour); - } + const match = str.match(singleHourPattern); + if (match[1] && match[2]) { + // hours first + const [, hour, teacherCode] = match; + result.teacherCode = teacherCode; + result.teacher = teacherMap[teacherCode.toLowerCase()]; + result.type = "single"; + result.hours = parseInt(hour); + } else { + // teacher first + const [, , teacherCode, hour] = match; + result.teacherCode = teacherCode; + result.teacher = teacherMap[teacherCode.toLowerCase()]; + result.type = "single"; + result.hours = parseInt(hour); + } + } else if (wholeDayPattern.test(str)) { const [, teacherCode] = str.match(wholeDayPattern); result.teacher = teacherMap[teacherCode.toLowerCase()]; result.teacherCode = teacherCode; result.type = "wholeDay"; - } + } else { result.type = "invalid"; result.original = str;