1
0

refactor: Rewrite to typescript
All checks were successful
Remote Deploy / deploy (push) Successful in 14s

This commit is contained in:
2026-02-11 08:20:56 +01:00
parent 138fa17e54
commit ae17dc241a
14 changed files with 941 additions and 121 deletions

281
scrape/parse/v1_v2.ts Normal file
View File

@@ -0,0 +1,281 @@
/*
* Copyright (C) 2025 Jakub Žitník
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
import ExcelJS, { Worksheet, Cell } from "exceljs"
import fs from "fs"
import parseAbsence, { AbsenceResult } from "../utils/parseAbsence.js"
import parseTeachers from "../utils/parseTeachers.js"
interface DatedSheet {
sheet: Worksheet;
dateKey: string;
}
interface ScheduleDay {
[key: string]: any;
ABSENCE?: AbsenceResult[];
}
export default async function parseV1V2(downloadedFilePath: string) {
const workbook = new ExcelJS.Workbook();
await workbook.xlsx.readFile(downloadedFilePath);
const teacherMap = await parseTeachers();
const dateRegex = /^(pondělí|úterý|středa|čtvrtek|pátek|po|út|ut|st|čt|ct|pa|pá)\s+(\d{1,2})\.\s*(\d{1,2})\.\s*(\d{4}|\d{2})/i;
// Get today's date for comparison
function getCurrentDateObject() {
const now = new Date();
return new Date(now.getFullYear(), now.getMonth(), now.getDate());
}
const today = getCurrentDateObject();
const datedSheets: DatedSheet[] = [];
for (const sheet of workbook.worksheets) {
const match = sheet.name.toLowerCase().match(dateRegex);
if (!match) continue;
const day = parseInt(match[2], 10);
const month = parseInt(match[3], 10) - 1;
const year = match[4].length === 2 ? Number('20' + match[4]) : Number(match[4]);
const sheetDate = new Date(year, month, day);
if (sheetDate < today) continue;
const dateKey = `${year}-${month + 1}-${day}`;
datedSheets.push({
sheet,
dateKey,
});
}
const sheetsByDate: Record<string, Worksheet[]> = {};
for (const item of datedSheets) {
sheetsByDate[item.dateKey] ??= [];
sheetsByDate[item.dateKey].push(item.sheet);
}
const upcomingSheets = Object.values(sheetsByDate).map((sheets) => {
if (sheets.length === 1) return sheets[0].name;
const found = sheets.find((s) => s.state !== "hidden");
return (found ?? sheets[0]).name;
});
const final: ScheduleDay[] = [];
let finalIndex = 0
for (const key of upcomingSheets) {
const currentSheet = workbook.getWorksheet(key);
if (!currentSheet) continue;
final.push({});
const regex = /[AEC][0-4][a-c]?\s*\/.*/s;
const prefixRegex = /[AEC][0-4][a-c]?/;
const classes: string[] = [];
const matchingKeys: string[] = [];
currentSheet.eachRow((row) => {
row.eachCell((cell) => {
const cellAddress = cell.address;
const value = cell.value;
if (value && typeof value === "string") {
const testResult = regex.test(value);
if (testResult && cellAddress.startsWith("A")) {
const prefixMatch = value.match(prefixRegex);
if (prefixMatch) {
const prefix = prefixMatch[0];
classes.push(prefix);
}
matchingKeys.push(cellAddress);
}
}
})
})
function letterToNumber(letter: string) {
return letter.toLowerCase().charCodeAt(0) - "a".charCodeAt(0);
}
// For each class
let classI = 0;
for (const matchingKey of matchingKeys) {
const matchingCell = currentSheet.getCell(matchingKey);
const rowNumber = matchingCell.row;
const allKeys: string[] = [];
// Get all cells in the same row
const row = currentSheet.getRow(Number(rowNumber));
row.eachCell((cell) => {
if (cell.address !== matchingKey) {
allKeys.push(cell.address);
}
})
// Use an array directly, initialized with nulls or sparse array logic
// The original code used `let final2 = []` but treated it as object `final2[parsedKey] = ...`
// Then `Array.from(final2)` converts it to array.
let final2: (string | null)[] = [];
for (const key of allKeys) {
const cell = currentSheet.getCell(key);
const parsedKey = letterToNumber(key.replace(/[0-9]/gi, ""));
let d = true;
try {
const regex = /^úklid\s+(?:\d+\s+)?[A-Za-z]{2}$/;
const cellText = cell.text || "";
// @ts-ignore - fgColor is missing in type definition for some versions or intricate structure
const fgColor = cell.fill?.fgColor;
if (regex.test(cellText.trim()) || cellText.trim().length == 0 || fgColor === undefined) {
d = false;
}
} catch {}
if (d) {
let text = cell.text;
// @ts-ignore
if (cell.fill?.fgColor?.argb == "FFFFFF00") {
text += "\n(bude upřesněno)";
}
final2[parsedKey] = text || "";
} else {
final2[parsedKey] = null;
}
}
const final2Array = Array.from(final2, (item) => (item === undefined ? null : item));
while (final2Array.length < 10) {
final2Array.push(null);
}
final[finalIndex][classes[classI]] = final2Array.slice(1, 11);
classI++;
}
// ABSENCE
final[finalIndex]["ABSENCE"] = [];
let absenceKey: string | null = null;
currentSheet.eachRow((row) => {
row.eachCell((cell) => {
const value = (typeof cell.value === "string" ? cell.value : "").trim().toLowerCase();
if (value === "absence") {
absenceKey = cell.address;
}
})
});
if (absenceKey) {
const absenceCell = currentSheet.getCell(absenceKey);
const rowNumber = absenceCell.row;
const allAbsenceKeys: string[] = [];
// Get all cells in the same row as absence
const row = currentSheet.getRow(Number(rowNumber));
row.eachCell((cell) => {
if (cell.address !== absenceKey) { // absenceKey is checked above to be non-null
allAbsenceKeys.push(cell.address);
}
})
const absenceRange = new Set(["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "L"])
for (const absenceKeyCur of allAbsenceKeys) {
if (!absenceRange.has(absenceKeyCur.substring(0, 1))) {
break;
};
const cell = currentSheet.getCell(absenceKeyCur);
const value = (cell.value || "").toString().trim();
if (value.length === 0) {
continue
}
const data = parseAbsence(value, teacherMap);
if (final[finalIndex]["ABSENCE"]) {
final[finalIndex]["ABSENCE"]!.push(...data);
}
}
}
finalIndex++;
}
const currentDate = new Date();
const formattedDate = currentDate.getHours().toString().padStart(2, "0") + ":" + currentDate.getMinutes().toString().padStart(2, "0");
const data = {
schedule: final,
props: upcomingSheets.map((str) => {
const dateMatch = str.match(/(\d{1,2})\.\s*(\d{1,2})\.\s*(\d{4}|\d{2})/);
let date = null;
if (dateMatch) {
const day = Number.parseInt(dateMatch[1], 10);
const month = Number.parseInt(dateMatch[2], 10);
const year = dateMatch[3].length === 2 ? Number('20' + dateMatch[3]) : Number(dateMatch[3]);
date = new Date(year, month - 1, day);
}
const isPriprava = str
.toLowerCase()
.normalize("NFD")
.replace(/[\u0300-\u036f]/g, "")
.includes("priprava");
return {
date: date
? `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, "0")}-${String(date.getDate()).padStart(2, "0")}`
: null,
priprava: isPriprava,
};
}),
status: {
lastUpdated: formattedDate,
}
}
fs.writeFileSync("db/v2.json", JSON.stringify(data, null, 2));
// Modify the data for v1
const copy = JSON.parse(JSON.stringify(data));
copy.schedule.forEach((day: ScheduleDay) => {
if (!Array.isArray(day.ABSENCE)) return;
day.ABSENCE = day.ABSENCE.map((old: any) => {
if (old.type === "zastoupen") {
return {
type: "invalid",
teacher: null,
teacherCode: null,
hours: null,
original: `za ${old.teacherCode.toUpperCase()} zastupuje ${old.zastupuje.teacherCode.toUpperCase()}`
};
}
return old;
});
});
fs.writeFileSync("db/v1.json", JSON.stringify(copy, null, 2))
}
//parseV1V2("db/current.xlsx")