From e3020a278facbc22aeee07fe245b9b25882f42b5 Mon Sep 17 00:00:00 2001 From: jzitnik-dev Date: Fri, 6 Feb 2026 20:27:21 +0100 Subject: [PATCH] feat: New v3 API --- scrape/parse.js | 5 +- scrape/parse/v3.js | 301 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 305 insertions(+), 1 deletion(-) create mode 100644 scrape/parse/v3.js diff --git a/scrape/parse.js b/scrape/parse.js index a4bb495..424ff77 100644 --- a/scrape/parse.js +++ b/scrape/parse.js @@ -13,7 +13,10 @@ */ import parseV1V2 from "./parse/v1_v2.js"; +import parseV3 from "./parse/v3.js"; + export default async function parseThisShit(downloadedFilePath) { - await parseV1V2(downloadedFilePath) + await parseV1V2(downloadedFilePath); + await parseV3(downloadedFilePath); } diff --git a/scrape/parse/v3.js b/scrape/parse/v3.js new file mode 100644 index 0000000..83b630a --- /dev/null +++ b/scrape/parse/v3.js @@ -0,0 +1,301 @@ +/* + * Copyright (C) 2025 Jakub Žitník + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +import fs from "fs"; +import parseAbsence from "../utils/parseAbsence.js" +import parseTeachers from "../utils/parseTeachers.js" +import ExcelJS from "exceljs" + +export default async function parseV3(downloadedFilePath) { + const workbook = new ExcelJS.Workbook(); + await workbook.xlsx.readFile(downloadedFilePath); + + const teacherMap = await parseTeachers(); + + const upcoming = getUpcomingSheets(workbook); + const resolvedDays = groupSheetsByDate(upcoming); + + const schedule = {}; + + for (const { dateKey, sheet } of resolvedDays) { + const { changes, absence, inWork, takesPlace, reservedRooms } = extractDaySchedule(sheet, teacherMap); + + schedule[dateKey] = { + info: { inWork }, + changes, + absence, + takesPlace, + reservedRooms, + }; + } + + const data = { + status: { lastUpdated: formatNowTime() }, + schedule, + }; + + fs.writeFileSync("db/v3.json", JSON.stringify(data, null, 2)); +} + +// +// ──────────────────────────────────────────────────────────── +// SHEET FILTERING +// ──────────────────────────────────────────────────────────── +// + +function getUpcomingSheets(workbook) { + const dateRegex = /^(pondělí|úterý|středa|čtvrtek|pátek|po|út|ut|st|čt|ct|pa|pá)\s+(\d{1,2})\.\s*(\d{1,2})\.\s*(20\d{2})/i; + + const today = new Date(); + const todayMidnight = new Date(today.getFullYear(), today.getMonth(), today.getDate()); + + const result = []; + + for (const sheet of workbook.worksheets) { + const match = sheet.name.match(dateRegex); + if (!match) continue; + + const day = Number(match[2]); + const month = Number(match[3]) - 1; + const year = Number(match[4]); + + const sheetDate = new Date(year, month, day); + if (sheetDate < todayMidnight) continue; + + const dateKey = `${year}-${String(month + 1).padStart(2, "0")}-${String(day).padStart(2, "0")}`; + result.push({ dateKey, sheet }); + } + + return result; +} + +function groupSheetsByDate(items) { + const map = {}; + + for (const item of items) { + map[item.dateKey] ??= []; + map[item.dateKey].push(item.sheet); + } + + return Object.entries(map).map(([dateKey, sheets]) => { + const chosen = + sheets.length === 1 + ? sheets[0] + : sheets.find((s) => s.state !== "hidden") ?? sheets[0]; + + return { dateKey, sheet: chosen }; + }); +} + +// +// ──────────────────────────────────────────────────────────── +// DAY PARSING +// ──────────────────────────────────────────────────────────── +// + +function extractDaySchedule(sheet, teacherMap) { + return { + changes: extractClassChanges(sheet), + absence: extractAbsence(sheet, teacherMap), + inWork: isPripravaSheet(sheet.name), + takesPlace: extractTakesPlace(sheet), + reservedRooms: extractReservedRooms(sheet) + }; +} + +function isPripravaSheet(name) { + return name + .toLowerCase() + .normalize("NFD") + .replace(/[\u0300-\u036f]/g, "") + .includes("priprava"); +} + +// +// ──────────────────────────────────────────────────────────── +// CLASS CHANGES +// ──────────────────────────────────────────────────────────── +// + +function extractClassChanges(sheet) { + const classRegex = /[AEC][0-4][a-c]?\s*\/.*/s; + const prefixRegex = /[AEC][0-4][a-c]?/; + + const classes = []; + const classCells = []; + + sheet.eachRow((row) => { + row.eachCell((cell) => { + const value = cell.value; + if (typeof value === "string" && classRegex.test(value) && cell.address.startsWith("A")) { + const prefixMatch = value.match(prefixRegex); + if (prefixMatch) classes.push(prefixMatch[0]); + classCells.push(cell.address); + } + }); + }); + + const changes = {}; + + classCells.forEach((address, index) => { + const row = sheet.getRow(sheet.getCell(address).row); + changes[classes[index]] = buildLessonArray(row, address); + }); + + return changes; +} + +function buildLessonArray(row, ignoreAddress) { + const lessons = []; + + row.eachCell((cell) => { + if (cell.address === ignoreAddress) return; + + const colIndex = letterToNumber(cell.address.replace(/[0-9]/g, "")); + lessons[colIndex] = parseLessonCell(cell); + }); + + const normalized = Array.from(lessons, (x) => (x === undefined ? null : x)); + while (normalized.length < 10) normalized.push(null); + + return normalized.slice(1, 11); +} + +function parseLessonCell(cell) { + try { + const text = (cell.text || "").trim(); + const cleanupRegex = /^úklid\s+(?:\d+\s+)?[A-Za-z]{2}$/; + + if (!text || cleanupRegex.test(text) || !cell.fill?.fgColor) return null; + + let result = text; + if (cell.fill.fgColor.argb === "FFFFFF00") { + result += "\n(bude upřesněno)"; + } + return result; + } catch { + return null; + } +} + + +function extractTakesPlace(sheet) { + const cell = sheet.getCell("B4"); + + if (!cell.isMerged) { + return ""; + } + + let str = cell.master.value.trim(); + + let i = 5; + while (true) { + const nextCell = sheet.getCell(`B${i}`); + + if (!nextCell.isMerged) { + break; + } + + const cell = nextCell.master + + str += `\n${cell.value.trim()}` + i++; + } + + return str; +} + +function extractReservedRooms(sheet) { + const result = []; + + const cells = []; + + sheet.eachRow((row) => { + row.eachCell((cell) => { + const value = cell.value; + if (typeof value === "string" && value.trim() === "rezervace" && cell.address.startsWith("A")) { + cells.push(cell.address); + } + }); + }); + + cells.forEach((address) => { + const row = sheet.getRow(sheet.getCell(address).row); + + row.eachCell((cell) => { + if (cell.address === address) return; + + result.push(cell.value.trim().length == 0 ? null : cell.value) + }); + }); + + return result; +} + +// +// ──────────────────────────────────────────────────────────── +// ABSENCE +// ──────────────────────────────────────────────────────────── +// + +function extractAbsence(sheet, teacherMap) { + let absenceAddress = null; + + sheet.eachRow((row) => { + row.eachCell((cell) => { + if ((cell.value || "").toString().trim().toLowerCase() === "absence") { + absenceAddress = cell.address; + } + }); + }); + + if (!absenceAddress) return []; + + const row = sheet.getRow(sheet.getCell(absenceAddress).row); + const results = []; + let i = 0; + + row.eachCell((cell) => { + if (cell.address === absenceAddress || i >= 10) return; + i++; + + const value = (cell.value || "").toString().trim(); + if (!value) return; + + results.push(...parseAbsence(value, teacherMap)); + }); + + return results; +} + +// +// ──────────────────────────────────────────────────────────── +// UTILS +// ──────────────────────────────────────────────────────────── +// + +function letterToNumber(letter) { + return letter.toLowerCase().charCodeAt(0) - 97; +} + +function formatNowTime() { + const now = new Date(); + return ( + now.getHours().toString().padStart(2, "0") + + ":" + + now.getMinutes().toString().padStart(2, "0") + ); +} + +//parseV3("db/current.xlsx")