1
0
Files
jecnarozvrh/scrape/parse/v3.ts
jzitnik-dev ae17dc241a
All checks were successful
Remote Deploy / deploy (push) Successful in 14s
refactor: Rewrite to typescript
2026-02-11 08:20:56 +01:00

428 lines
12 KiB
TypeScript

/*
* Copyright (C) 2025 Jakub Žitník
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
import fs from "fs";
import parseAbsence from "../utils/parseAbsence.js"
import parseTeachers from "../utils/parseTeachers.js"
import ExcelJS, { Worksheet, Cell, Row } from "exceljs"
import JSZip from "jszip";
import { parseStringPromise } from "xml2js";
interface ThemeColors {
[key: number]: string | null;
}
interface Lesson {
text: string;
backgroundColor: string | null;
foregroundColor?: string;
willBeSpecified?: boolean;
}
interface ResolvedDay {
dateKey: string;
sheet: Worksheet;
}
/**
* Read theme colors from the workbook
*/
async function getThemeColors(filePath: string): Promise<ThemeColors | null> {
const data = fs.readFileSync(filePath);
const zip = await JSZip.loadAsync(data);
// list all files for debug
const themeFile = zip.file("xl/theme/theme1.xml");
if (!themeFile) {
return null;
}
const themeXml = await themeFile.async("text");
const theme = await parseStringPromise(themeXml);
const scheme = theme["a:theme"]?.["a:themeElements"]?.[0]?.["a:clrScheme"]?.[0];
if (!scheme) return null;
function getColor(node: any) {
if (node["a:srgbClr"]) return node["a:srgbClr"][0].$.val;
if (node["a:sysClr"]) return node["a:sysClr"][0].$.lastClr;
return null;
}
const colors: ThemeColors = {
0: getColor(scheme["a:dk1"]?.[0]),
1: getColor(scheme["a:lt1"]?.[0]),
2: getColor(scheme["a:dk2"]?.[0]),
3: getColor(scheme["a:lt2"]?.[0]),
4: getColor(scheme["a:accent1"]?.[0]),
5: getColor(scheme["a:accent2"]?.[0]),
6: getColor(scheme["a:accent3"]?.[0]),
7: getColor(scheme["a:accent4"]?.[0]),
8: getColor(scheme["a:accent5"]?.[0]),
9: getColor(scheme["a:accent6"]?.[0]),
};
return colors;
}
/**
* Apply Excel tint to a base hex color
*/
function applyTintToHex(hex: string, tint: number = 0) {
const r = parseInt(hex.slice(0, 2), 16);
const g = parseInt(hex.slice(2, 4), 16);
const b = parseInt(hex.slice(4, 6), 16);
const tintChannel = (c: number) =>
tint > 0 ? Math.round(c + (255 - c) * tint) : Math.round(c * (1 + tint));
const nr = tintChannel(r);
const ng = tintChannel(g);
const nb = tintChannel(b);
return [nr, ng, nb]
.map((v) => v.toString(16).padStart(2, "0"))
.join("")
.toUpperCase();
}
/**
* Resolve final hex for a cell fill
*/
function resolveCellColor(cell: Cell, themeColors: ThemeColors | null) {
// @ts-ignore
if (!cell.fill?.fgColor) return null;
// @ts-ignore
const fg = cell.fill.fgColor;
if (fg.argb) return `#${fg.argb}`;
if (fg.theme !== undefined && themeColors) {
const base = themeColors[fg.theme];
if (!base) return null;
return `#${applyTintToHex(base, fg.tint ?? 0)}`;
}
return null;
}
export default async function parseV3(downloadedFilePath: string) {
const workbook = new ExcelJS.Workbook();
await workbook.xlsx.readFile(downloadedFilePath);
const themeColors = await getThemeColors(downloadedFilePath);
const teacherMap = await parseTeachers();
const upcoming = getUpcomingSheets(workbook);
const resolvedDays = groupSheetsByDate(upcoming);
const schedule: any = {};
for (const { dateKey, sheet } of resolvedDays) {
const { changes, absence, inWork, takesPlace, reservedRooms } = extractDaySchedule(sheet, teacherMap, themeColors);
schedule[dateKey] = {
info: { inWork },
changes,
absence,
takesPlace,
reservedRooms,
};
}
const data = {
status: { lastUpdated: formatNowTime() },
schedule,
};
fs.writeFileSync("db/v3.json", JSON.stringify(data, null, 2));
}
//
// ────────────────────────────────────────────────────────────
// SHEET FILTERING
// ────────────────────────────────────────────────────────────
//
function getUpcomingSheets(workbook: ExcelJS.Workbook): ResolvedDay[] {
const dateRegex = /^(pondělí|úterý|středa|čtvrtek|pátek|po|út|ut|st|čt|ct|pa|pá)\s+(\d{1,2})\.\s*(\d{1,2})\.\s*(\d{4}|\d{2})/i;
const today = new Date();
const todayMidnight = new Date(today.getFullYear(), today.getMonth(), today.getDate());
const result: ResolvedDay[] = [];
for (const sheet of workbook.worksheets) {
const match = sheet.name.toLowerCase().match(dateRegex);
if (!match) continue;
const day = Number(match[2]);
const month = Number(match[3]) - 1;
const year = match[4].length === 2 ? Number('20' + match[4]) : Number(match[4]);
const sheetDate = new Date(year, month, day);
if (sheetDate < todayMidnight) continue;
const dateKey = `${year}-${String(month + 1).padStart(2, "0")}-${String(day).padStart(2, "0")}`;
result.push({ dateKey, sheet });
}
return result;
}
function groupSheetsByDate(items: ResolvedDay[]) {
const map: Record<string, Worksheet[]> = {};
for (const item of items) {
map[item.dateKey] ??= [];
map[item.dateKey].push(item.sheet);
}
return Object.entries(map).map(([dateKey, sheets]) => {
const chosen =
sheets.length === 1
? sheets[0]
: sheets.find((s) => s.state !== "hidden") ?? sheets[0];
return { dateKey, sheet: chosen };
});
}
//
// ────────────────────────────────────────────────────────────
// DAY PARSING
// ────────────────────────────────────────────────────────────
//
function extractDaySchedule(sheet: Worksheet, teacherMap: Record<string, string>, themeColors: ThemeColors | null) {
return {
changes: extractClassChanges(sheet, themeColors),
absence: extractAbsence(sheet, teacherMap),
inWork: isPripravaSheet(sheet.name.toLowerCase()),
takesPlace: extractTakesPlace(sheet),
reservedRooms: extractReservedRooms(sheet)
};
}
function isPripravaSheet(name: string) {
return name
.toLowerCase()
.normalize("NFD")
.replace(/[\u0300-\u036f]/g, "")
.includes("priprava");
}
//
// ────────────────────────────────────────────────────────────
// CLASS CHANGES
// ────────────────────────────────────────────────────────────
//
function extractClassChanges(sheet: Worksheet, themeColors: ThemeColors | null) {
const classRegex = /[AEC][0-4][a-c]?\s*\/.*/s;
const prefixRegex = /[AEC][0-4][a-c]?/;
const classes: string[] = [];
const classCells: string[] = [];
sheet.eachRow((row) => {
row.eachCell((cell) => {
const value = cell.value;
if (typeof value === "string" && classRegex.test(value) && cell.address.startsWith("A")) {
const prefixMatch = value.match(prefixRegex);
if (prefixMatch) classes.push(prefixMatch[0]);
classCells.push(cell.address);
}
});
});
const changes: Record<string, (Lesson | null)[]> = {};
classCells.forEach((address, index) => {
const row = sheet.getRow(Number(sheet.getCell(address).row));
changes[classes[index]] = buildLessonArray(row, address, themeColors);
});
return changes;
}
function buildLessonArray(row: Row, ignoreAddress: string, themeColors: ThemeColors | null) {
const lessons: (Lesson | null)[] = [];
row.eachCell((cell) => {
if (cell.address === ignoreAddress) return;
const colIndex = letterToNumber(cell.address.replace(/[0-9]/g, ""));
lessons[colIndex] = parseLessonCell(cell, themeColors);
});
const normalized = Array.from(lessons, (x) => (x === undefined ? null : x));
while (normalized.length < 11) normalized.push(null);
return normalized.slice(1, 11);
}
function parseLessonCell(cell: Cell, themeColors: ThemeColors | null): Lesson | null {
try {
const text = (cell.text || "").trim();
const cleanupRegex = /^úklid\s+(?:\d+\s+)?[A-Za-z]{2}$/;
// @ts-ignore
if (!text || cleanupRegex.test(text) || !cell.fill?.fgColor) return null;
const backgroundColor = resolveCellColor(cell, themeColors);
const foregroundColor = !backgroundColor ? undefined : (
cell.font?.color?.argb === undefined ? "#FF000000" : `#${cell.font.color.argb}`
);
return {
text,
backgroundColor,
foregroundColor,
// @ts-ignore
willBeSpecified: cell.fill.fgColor.argb === "FFFFFF00" ? true : undefined,
};
} catch {
return null;
}
}
function extractTakesPlace(sheet: Worksheet) {
const cell = sheet.getCell("B4");
if (!cell.isMerged) {
return "";
}
let str = "";
let i = 4;
while (true) {
const tryCells = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"];
const threshold = 20;
let con = false;
for (const cellTest of tryCells) {
const cellTry = sheet.getCell(`${cellTest}${i}`)
const cellValue = (typeof cellTry?.value === 'string' ? cellTry.value.trim() : "") || "";
if (cellValue.length >= threshold) {
str += `\n${cellValue}`;
con = true;
break;
}
}
if (con || i == 4) {
i++;
continue;
} else {
break;
}
}
return str.trim();
}
function extractReservedRooms(sheet: Worksheet) {
const result: (string | null)[] = [];
const cells: string[] = [];
sheet.eachRow((row) => {
row.eachCell((cell) => {
const value = cell.value;
if (typeof value === "string" && value.trim() === "rezervace" && cell.address.startsWith("A")) {
cells.push(cell.address);
}
});
});
cells.forEach((address) => {
const row = sheet.getRow(Number(sheet.getCell(address).row));
row.eachCell((cell) => {
if (cell.address === address) return;
const val = cell.value?.toString().trim();
result.push(!val || val.length == 0 ? null : val)
});
});
while (result.length < 10) {
result.push(null);
}
return result;
}
//
// ────────────────────────────────────────────────────────────
// ABSENCE
// ────────────────────────────────────────────────────────────
//
function extractAbsence(sheet: Worksheet, teacherMap: Record<string, string>) {
let absenceAddress: string | null = null;
sheet.eachRow((row) => {
row.eachCell((cell) => {
if ((cell.value || "").toString().trim().toLowerCase() === "absence") {
absenceAddress = cell.address;
}
});
});
if (!absenceAddress) return [];
const row = sheet.getRow(Number(sheet.getCell(absenceAddress).row));
const results: any[] = [];
const absenceRange = new Set(["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "L"])
row.eachCell((cell) => {
if (cell.address === absenceAddress || !absenceRange.has(cell.address.substring(0, 1))) {
return
};
const value = (cell.value || "").toString().trim();
if (!value) return;
results.push(...parseAbsence(value, teacherMap));
});
return results;
}
//
// ────────────────────────────────────────────────────────────
// UTILS
// ────────────────────────────────────────────────────────────
//
function letterToNumber(letter: string) {
return letter.toLowerCase().charCodeAt(0) - 97;
}
function formatNowTime() {
const now = new Date();
return (
now.getHours().toString().padStart(2, "0") +
":" +
now.getMinutes().toString().padStart(2, "0")
);
}
parseV3("db/current.xlsx")