diff --git a/scrape/parse.js b/scrape/parse.js index 83fa1e4..01912ca 100644 --- a/scrape/parse.js +++ b/scrape/parse.js @@ -159,7 +159,7 @@ export default async function parseThisShit(downloadedFilePath) { } const data = parseAbsence(value, teacherMap); - final[finalIndex]["ABSENCE"].push(data); + final[finalIndex]["ABSENCE"].push(...data); } } @@ -202,7 +202,7 @@ export default async function parseThisShit(downloadedFilePath) { } } - fs.writeFileSync("db/current.json", JSON.stringify(data)); + fs.writeFileSync("db/current.json", JSON.stringify(data, null, 2)); } -// parseThisShit("downloads/table.xlsx") +parseThisShit("downloads/table.xlsx") diff --git a/scrape/utils/parseAbsence.js b/scrape/utils/parseAbsence.js index 5c52514..3c833cb 100644 --- a/scrape/utils/parseAbsence.js +++ b/scrape/utils/parseAbsence.js @@ -1,106 +1,137 @@ -export default function parseAbsence(str, teacherMap) { - const LAST_HOUR = 10; +const LAST_HOUR = 10; - const s = (str ?? "").trim().replace(/\s+/g, " "); - const result = { - teacher: null, // String or null - teacherCode: null, // String or null - type: null, // "wholeDay" | "range" | "single" | "invalid" - hours: null // { from, to } or number - }; +// ------------------------------- +// Helpers +// ------------------------------- +export const cleanInput = (input) => (input ?? "").trim().replace(/\s+/g, " "); +export const isTeacherToken = (t) => /^[A-Za-z]+$/.test(t); - if (!s) { - result.type = "invalid"; - result.original = s; - return result; +export const parseSpec = (spec) => { + if (!spec) return null; + let m; + if ((m = spec.match(/^(\d+)\+$/))) { + const from = Number(m[1]); + return from >= 1 && from <= LAST_HOUR + ? { kind: "range", value: { from, to: LAST_HOUR } } + : null; + } + if ((m = spec.match(/^(\d+)-(\d+)$/))) { + const from = Number(m[1]); + const to = Number(m[2]); + return from >= 1 && to >= from && to <= LAST_HOUR + ? { kind: "range", value: { from, to } } + : null; + } + if ((m = spec.match(/^(\d+)$/))) { + const hour = Number(m[1]); + return hour >= 1 && hour <= LAST_HOUR + ? { kind: "single", value: hour } + : null; + } + return null; +}; + +export const resolveTeacher = (teacherCode, teacherMap = {}) => ({ + code: teacherCode, + name: teacherMap?.[teacherCode.toLowerCase()] ?? null, +}); + +const makeResult = (teacherCode, spec, teacherMap) => { + const { name } = resolveTeacher(teacherCode, teacherMap); + const type = spec ? (spec.kind === "range" ? "range" : "single") : "wholeDay"; + const hours = spec ? spec.value : null; + return { teacher: name, teacherCode, type, hours }; +}; + +// ------------------------------- +// Teacher list processing (modular) +// ------------------------------- +const processTeacherList = (teacherListStr, spec, teacherMap) => { + let results = []; + + if (teacherListStr.includes(",")) { + // Comma = spec applies to all + const teachers = teacherListStr.split(/\s*,\s*/).filter(Boolean); + results = teachers.map((t) => makeResult(t, spec, teacherMap)); + } else if (teacherListStr.includes(";")) { + // Semicolon = spec applies only to last, others = whole day + const teachers = teacherListStr.split(/\s*;\s*/).filter(Boolean); + teachers.forEach((t, i) => { + const resSpec = i === teachers.length - 1 ? spec : null; + results.push(makeResult(t, resSpec, teacherMap)); + }); + } else { + // Single teacher + results.push(makeResult(teacherListStr, spec, teacherMap)); } - const isTeacher = (t) => /^[A-Za-z]+$/.test(t); + return results; +}; - const parseSpec = (spec) => { - let m; - // "5+" -> range 5..LAST_HOUR - if ((m = spec.match(/^(\d+)\+$/))) { - const from = parseInt(m[1], 10); - if (from >= 1 && from <= LAST_HOUR) { - return { kind: "range", value: { from, to: LAST_HOUR } }; - } - return null; - } - // "5-7" -> range 5..7 - if ((m = spec.match(/^(\d+)-(\d+)$/))) { - const from = parseInt(m[1], 10); - const to = parseInt(m[2], 10); - if (from >= 1 && to >= 1 && from <= to && to <= LAST_HOUR) { - return { kind: "range", value: { from, to } }; - } - return null; - } - // "5" -> single 5 - if ((m = spec.match(/^(\d+)$/))) { - const hour = parseInt(m[1], 10); - if (hour >= 1 && hour <= LAST_HOUR) { - return { kind: "single", value: hour }; - } - return null; - } - return null; - }; +// ------------------------------- +// Main parser +// ------------------------------- +export default function parseAbsence(input, teacherMap = {}) { + const s = cleanInput(input); + if (!s) return []; - const parts = s.split(" "); - if (parts.length === 1) { - const t = parts[0]; - if (isTeacher(t)) { - result.teacherCode = t; - result.teacher = teacherMap?.[t.toLowerCase()] ?? null; - result.type = "wholeDay"; - return result; - } - result.type = "invalid"; - result.original = s; - return result; + const results = []; + const consumed = []; + const markConsumed = (start, end) => consumed.push([start, end]); + const isConsumed = (i) => consumed.some(([a, b]) => i >= a && i < b); + + // Regex: teacher-list [+ optional spec] + const teacherListThenSpecRe = /([A-Za-z]+(?:[,;]\s?[A-Za-z]+)*)(?:\s*)?(\d+(?:\+|-\d+)?)/g; + let m; + while ((m = teacherListThenSpecRe.exec(s)) !== null) { + const matchStart = m.index; + const matchEnd = teacherListThenSpecRe.lastIndex; + if (isConsumed(matchStart)) continue; + + const [_, teacherListStr, specStr] = m; + const spec = parseSpec(specStr); + if (!spec) continue; + + results.push(...processTeacherList(teacherListStr, spec, teacherMap)); + markConsumed(matchStart, matchEnd); } - if (parts.length === 2) { - const [a, b] = parts; + // Standalone teachers → whole day + const teacherOnlyRe = /([A-Za-z]+(?:[,;]\s?[A-Za-z]+)*)/g; + while ((m = teacherOnlyRe.exec(s)) !== null) { + const matchStart = m.index; + const matchEnd = teacherOnlyRe.lastIndex; + if (isConsumed(matchStart)) continue; - // Teacher first: "Nm 5", "Nm 5-7", "Nm 5+" - if (isTeacher(a)) { - const spec = parseSpec(b); - if (!spec) { - result.type = "invalid"; - result.original = s; - return result; - } - result.teacherCode = a; - result.teacher = teacherMap?.[a.toLowerCase()] ?? null; - result.type = spec.kind === "range" ? "range" : "single"; - result.hours = spec.value; - return result; - } - - // Teacher last: "5 Nm", "5-7 Nm", "5+ Nm" - if (isTeacher(b)) { - const spec = parseSpec(a); - if (!spec) { - result.type = "invalid"; - result.original = s; - return result; - } - result.teacherCode = b; - result.teacher = teacherMap?.[b.toLowerCase()] ?? null; - result.type = spec.kind === "range" ? "range" : "single"; - result.hours = spec.value; - return result; - } - - result.type = "invalid"; - result.original = s; - return result; + const tList = m[1].split(/[,;]\s*/).filter(Boolean); + tList.forEach((t) => { + if (isTeacherToken(t)) results.push(makeResult(t, null, teacherMap)); + else + results.push({ + type: "invalid", + teacher: null, + teacherCode: null, + hours: null, + original: t, + }); + }); + markConsumed(matchStart, matchEnd); } - // Anything else is invalid - result.type = "invalid"; - result.original = s; - return result; + // Bare specs without teacher → invalid + const specOnlyRe = /\b(\d+(?:\+|-\d+)?)\b/g; + while ((m = specOnlyRe.exec(s)) !== null) { + const matchStart = m.index; + if (isConsumed(matchStart)) continue; + + results.push({ + type: "invalid", + teacher: null, + teacherCode: null, + hours: null, + original: m[1], + }); + } + + return results; }