diff --git a/scrape/api/announcements.ts b/scrape/api/announcements.ts index 5e57001..fe6adce 100644 --- a/scrape/api/announcements.ts +++ b/scrape/api/announcements.ts @@ -49,7 +49,7 @@ export default async function getAnnouncements(dates: string[]): Promise Promise +} + +async function doAll(saves: Save[], workbook: ExcelJS.Workbook, downloadedFilePath: string) { + const promises = saves.map(save => { + return (async function() { + const res = await save.parser(workbook, downloadedFilePath); + + if (typeof save.filePath === "string") { + save.filePath = [save.filePath]; + } + + await Promise.all( + save.filePath.map(filePath => fs.writeFile(filePath, JSON.stringify(res))) + ); + })() + }) + + await Promise.all(promises); +} + +export default async function parseAll(downloadedFilePath: string) { const workbook = new ExcelJS.Workbook(); await workbook.xlsx.readFile(downloadedFilePath); - //await parseV1V2(workbook); - await generateArchivedV1_V2(); - await parseV3(workbook, downloadedFilePath); + await doAll([ + { + filePath: ["volume/db/v1.json", "volume/db/v2.json"], + parser: generateArchivedV1_V2, + }, + { + filePath: "volume/db/v3.json", + parser: parseV3 + }, + ], workbook, downloadedFilePath); } -//parseThisShit("volume/db/current.xlsx") +if (process.argv[1] === fileURLToPath(import.meta.url)) { + await parseAll("volume/db/current.xlsx"); +} diff --git a/scrape/parse/archived/v1_v2.ts b/scrape/parse/archived/v1_v2.ts index 4a56578..5ab56f9 100644 --- a/scrape/parse/archived/v1_v2.ts +++ b/scrape/parse/archived/v1_v2.ts @@ -1,5 +1,3 @@ -import fs from "fs/promises" - const CLASSES: string[] = [ "A1a", "A1b", "A1c", "C1a", "C1b", "C1c", "A2", "C2a", "C2b", "C2c", "E2", "C3a", "C3b", "C3c", "E3" ]; @@ -73,8 +71,5 @@ export default async function generateArchivedV1_V2() { data.schedule.push(d); } - await Promise.all([ - fs.writeFile("volume/db/v1.json", JSON.stringify(data, null, 2)), - fs.writeFile("volume/db/v2.json", JSON.stringify(data, null, 2)), - ]); + return data; } diff --git a/scrape/parse/v1_v2.ts b/scrape/parse/v1_v2.ts deleted file mode 100644 index 344931c..0000000 --- a/scrape/parse/v1_v2.ts +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Copyright (C) 2025 Jakub Žitník - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -import { Workbook, Worksheet } from "exceljs" -import fs from "fs" -import parseAbsence, { AbsenceResult } from "../utils/parseAbsence.js" -import parseTeachers from "../utils/parseTeachers.js" - -interface DatedSheet { - sheet: Worksheet; - dateKey: string; -} - -interface ScheduleDay { - [key: string]: any; - ABSENCE?: AbsenceResult[]; -} - -export default async function parseV1V2(workbook: Workbook) { - const teacherMap = await parseTeachers(); - - const dateRegex = /^(pondělí|úterý|středa|čtvrtek|pátek|po|út|ut|st|čt|ct|pa|pá)\s+(\d{1,2})\.\s*(\d{1,2})\.\s*(\d{4}|\d{2})/i; - - // Get today's date for comparison - function getCurrentDateObject() { - const now = new Date(); - return new Date(now.getFullYear(), now.getMonth(), now.getDate()); - } - - const today = getCurrentDateObject(); - - const datedSheets: DatedSheet[] = []; - - for (const sheet of workbook.worksheets) { - const match = sheet.name.toLowerCase().match(dateRegex); - if (!match) continue; - - const day = parseInt(match[2], 10); - const month = parseInt(match[3], 10) - 1; - const year = match[4].length === 2 ? Number('20' + match[4]) : Number(match[4]); - - const sheetDate = new Date(year, month, day); - if (sheetDate < today) continue; - - const dateKey = `${year}-${month + 1}-${day}`; - - datedSheets.push({ - sheet, - dateKey, - }); - } - - const sheetsByDate: Record = {}; - for (const item of datedSheets) { - sheetsByDate[item.dateKey] ??= []; - sheetsByDate[item.dateKey].push(item.sheet); - } - - const upcomingSheets = Object.values(sheetsByDate).map((sheets) => { - if (sheets.length === 1) return sheets[0].name; - const found = sheets.find((s) => s.state !== "hidden"); - return (found ?? sheets[0]).name; - }); - - const final: ScheduleDay[] = []; - - let finalIndex = 0 - for (const key of upcomingSheets) { - const currentSheet = workbook.getWorksheet(key); - if (!currentSheet) continue; - - final.push({}); - - const regex = /[AEC][0-4][a-c]?\s*\/.*/s; - const prefixRegex = /[AEC][0-4][a-c]?/; - const classes: string[] = []; - const matchingKeys: string[] = []; - - currentSheet.eachRow((row) => { - row.eachCell((cell) => { - const cellAddress = cell.address; - const value = cell.value; - - if (value && typeof value === "string") { - const testResult = regex.test(value); - if (testResult && cellAddress.startsWith("A")) { - const prefixMatch = value.match(prefixRegex); - if (prefixMatch) { - const prefix = prefixMatch[0]; - classes.push(prefix); - } - matchingKeys.push(cellAddress); - } - } - }) - }) - - function letterToNumber(letter: string) { - return letter.toLowerCase().charCodeAt(0) - "a".charCodeAt(0); - } - - // For each class - let classI = 0; - for (const matchingKey of matchingKeys) { - const matchingCell = currentSheet.getCell(matchingKey); - const rowNumber = matchingCell.row; - const allKeys: string[] = []; - - // Get all cells in the same row - const row = currentSheet.getRow(Number(rowNumber)); - row.eachCell((cell) => { - if (cell.address !== matchingKey) { - allKeys.push(cell.address); - } - }) - - let final2: (string | null)[] = []; - - for (const key of allKeys) { - const cell = currentSheet.getCell(key); - const parsedKey = letterToNumber(key.replace(/[0-9]/gi, "")); - let d = true; - - try { - const regex = /^úklid\s+(?:\d+\s+)?[A-Za-z]{2}$/; - const cellText = cell.text || ""; - // @ts-ignore - const fgColor = cell.fill?.fgColor; - if (regex.test(cellText.trim()) || cellText.trim().length == 0 || fgColor === undefined) { - d = false; - } - } catch {} - - if (d) { - let text = cell.text; - // @ts-ignore - if (cell.fill?.fgColor?.argb == "FFFFFF00") { - text += "\n(bude upřesněno)"; - } - final2[parsedKey] = text || ""; - } else { - final2[parsedKey] = null; - } - } - - const final2Array = Array.from(final2, (item) => (item === undefined ? null : item)); - while (final2Array.length < 10) { - final2Array.push(null); - } - - final[finalIndex][classes[classI]] = final2Array.slice(1, 11); - - classI++; - } - - // ABSENCE - final[finalIndex]["ABSENCE"] = []; - let absenceKey: string | null = null; - - currentSheet.eachRow((row) => { - row.eachCell((cell) => { - const value = (typeof cell.value === "string" ? cell.value : "").trim().toLowerCase(); - if (value === "absence") { - absenceKey = cell.address; - } - }) - }); - - if (absenceKey) { - const absenceCell = currentSheet.getCell(absenceKey); - const rowNumber = absenceCell.row; - const allAbsenceKeys: string[] = []; - - // Get all cells in the same row as absence - const row = currentSheet.getRow(Number(rowNumber)); - row.eachCell((cell) => { - if (cell.address !== absenceKey) { // absenceKey is checked above to be non-null - allAbsenceKeys.push(cell.address); - } - }) - - const absenceRange = new Set(["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "L"]) - for (const absenceKeyCur of allAbsenceKeys) { - if (!absenceRange.has(absenceKeyCur.substring(0, 1))) { - break; - }; - - const cell = currentSheet.getCell(absenceKeyCur); - const value = (cell.value || "").toString().trim(); - if (value.length === 0) { - continue - } - - const data = parseAbsence(value, teacherMap); - if (final[finalIndex]["ABSENCE"]) { - final[finalIndex]["ABSENCE"]!.push(...data); - } - } - } - - finalIndex++; - } - - const currentDate = new Date(); - const formattedDate = currentDate.getHours().toString().padStart(2, "0") + ":" + currentDate.getMinutes().toString().padStart(2, "0"); - - const data = { - schedule: final, - props: upcomingSheets.map((str) => { - const dateMatch = str.match(/(\d{1,2})\.\s*(\d{1,2})\.\s*(\d{4}|\d{2})/); - - let date = null; - - if (dateMatch) { - const day = Number.parseInt(dateMatch[1], 10); - const month = Number.parseInt(dateMatch[2], 10); - const year = dateMatch[3].length === 2 ? Number('20' + dateMatch[3]) : Number(dateMatch[3]); - - date = new Date(year, month - 1, day); - } - - const isPriprava = str - .toLowerCase() - .normalize("NFD") - .replace(/[\u0300-\u036f]/g, "") - .includes("priprava"); - - return { - date: date - ? `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, "0")}-${String(date.getDate()).padStart(2, "0")}` - : null, - priprava: isPriprava, - }; - }), - status: { - lastUpdated: formattedDate, - } - } - - fs.writeFileSync("volume/db/v2.json", JSON.stringify(data, null, 2)); - - // Modify the data for v1 - const copy = JSON.parse(JSON.stringify(data)); - - copy.schedule.forEach((day: ScheduleDay) => { - if (!Array.isArray(day.ABSENCE)) return; - - day.ABSENCE = day.ABSENCE.map((old: any) => { - if (old.type === "zastoupen") { - return { - type: "invalid", - teacher: null, - teacherCode: null, - hours: null, - original: `za ${old.teacherCode.toUpperCase()} zastupuje ${old.zastupuje.teacherCode.toUpperCase()}` - }; - } - return old; - }); - }); - - fs.writeFileSync("volume/db/v1.json", JSON.stringify(copy, null, 2)) -} - -//parseV1V2("db/current.xlsx") diff --git a/scrape/parse/v3.ts b/scrape/parse/v3.ts index 69d1ef3..7673d68 100644 --- a/scrape/parse/v3.ts +++ b/scrape/parse/v3.ts @@ -136,7 +136,7 @@ export default async function parseV3(workbook: Workbook, downloadedFilePath: st for (const { dateKey, sheet } of resolvedDays) { const ann = announcements[dateKey]; - const allFlags = ann.map(a => a.flags).flat(); + const allFlags = ann?.map(a => a.flags)?.flat() || []; const { changes, absence, inWork, takesPlace, reservedRooms } = extractDaySchedule(sheet, teacherMap, themeColors, allFlags); @@ -155,7 +155,7 @@ export default async function parseV3(workbook: Workbook, downloadedFilePath: st schedule, }; - fs.writeFileSync("volume/db/v3.json", JSON.stringify(data, null, 2)); + return data; } // @@ -238,7 +238,6 @@ function isPripravaSheet(name: string) { // function extractClassChanges(sheet: Worksheet, themeColors: ThemeColors | null, flags: Flag[]) { - console.log(flags) const ignoreColors = flags.includes(Flag.SHOW_ALL_ENTRIES) const classRegex = /[AEC][0-4][a-c]?\s*\/.*/s; const prefixRegex = /[AEC][0-4][a-c]?/; diff --git a/scrape/scraper.ts b/scrape/scraper.ts index 2bd1473..0c5ed03 100644 --- a/scrape/scraper.ts +++ b/scrape/scraper.ts @@ -15,7 +15,7 @@ import puppeteer, { Page, Browser } from 'puppeteer'; import path from 'path'; import fs from 'fs'; -import parseThisShit from './parse.js'; +import parseAll from './parse.js'; import 'dotenv/config'; const EMAIL = process.env.EMAIL; @@ -195,7 +195,7 @@ async function handleError(page: Page, err: any) { await fs.promises.cp(downloadedFilePath, path.join(DB_FOLDER, "current.xlsx")); - await parseThisShit(downloadedFilePath); + await parseAll(downloadedFilePath); await clearDownloadsFolder(); await browser.close(); diff --git a/viewer/app/view.tsx b/viewer/app/view.tsx index 51516b7..817a6d2 100644 --- a/viewer/app/view.tsx +++ b/viewer/app/view.tsx @@ -52,7 +52,6 @@ export default function View({ data }: ViewProps) { try { const text = await file.text(); const jsonData = JSON.parse(text); - console.log(jsonData) const foundKey = Object.keys(jsonData).find(k => k.toLowerCase() === classNameProcessed);