refactor: Rewrite to typescript
All checks were successful
Remote Deploy / deploy (push) Successful in 14s
All checks were successful
Remote Deploy / deploy (push) Successful in 14s
This commit is contained in:
281
scrape/parse/v1_v2.ts
Normal file
281
scrape/parse/v1_v2.ts
Normal file
@@ -0,0 +1,281 @@
|
||||
/*
|
||||
* Copyright (C) 2025 Jakub Žitník
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
import ExcelJS, { Worksheet, Cell } from "exceljs"
|
||||
import fs from "fs"
|
||||
import parseAbsence, { AbsenceResult } from "../utils/parseAbsence.js"
|
||||
import parseTeachers from "../utils/parseTeachers.js"
|
||||
|
||||
interface DatedSheet {
|
||||
sheet: Worksheet;
|
||||
dateKey: string;
|
||||
}
|
||||
|
||||
interface ScheduleDay {
|
||||
[key: string]: any;
|
||||
ABSENCE?: AbsenceResult[];
|
||||
}
|
||||
|
||||
export default async function parseV1V2(downloadedFilePath: string) {
|
||||
const workbook = new ExcelJS.Workbook();
|
||||
await workbook.xlsx.readFile(downloadedFilePath);
|
||||
const teacherMap = await parseTeachers();
|
||||
|
||||
const dateRegex = /^(pondělí|úterý|středa|čtvrtek|pátek|po|út|ut|st|čt|ct|pa|pá)\s+(\d{1,2})\.\s*(\d{1,2})\.\s*(\d{4}|\d{2})/i;
|
||||
|
||||
// Get today's date for comparison
|
||||
function getCurrentDateObject() {
|
||||
const now = new Date();
|
||||
return new Date(now.getFullYear(), now.getMonth(), now.getDate());
|
||||
}
|
||||
|
||||
const today = getCurrentDateObject();
|
||||
|
||||
const datedSheets: DatedSheet[] = [];
|
||||
|
||||
for (const sheet of workbook.worksheets) {
|
||||
const match = sheet.name.toLowerCase().match(dateRegex);
|
||||
if (!match) continue;
|
||||
|
||||
const day = parseInt(match[2], 10);
|
||||
const month = parseInt(match[3], 10) - 1;
|
||||
const year = match[4].length === 2 ? Number('20' + match[4]) : Number(match[4]);
|
||||
|
||||
const sheetDate = new Date(year, month, day);
|
||||
if (sheetDate < today) continue;
|
||||
|
||||
const dateKey = `${year}-${month + 1}-${day}`;
|
||||
|
||||
datedSheets.push({
|
||||
sheet,
|
||||
dateKey,
|
||||
});
|
||||
}
|
||||
|
||||
const sheetsByDate: Record<string, Worksheet[]> = {};
|
||||
for (const item of datedSheets) {
|
||||
sheetsByDate[item.dateKey] ??= [];
|
||||
sheetsByDate[item.dateKey].push(item.sheet);
|
||||
}
|
||||
|
||||
const upcomingSheets = Object.values(sheetsByDate).map((sheets) => {
|
||||
if (sheets.length === 1) return sheets[0].name;
|
||||
const found = sheets.find((s) => s.state !== "hidden");
|
||||
return (found ?? sheets[0]).name;
|
||||
});
|
||||
|
||||
const final: ScheduleDay[] = [];
|
||||
|
||||
let finalIndex = 0
|
||||
for (const key of upcomingSheets) {
|
||||
const currentSheet = workbook.getWorksheet(key);
|
||||
if (!currentSheet) continue;
|
||||
|
||||
final.push({});
|
||||
|
||||
const regex = /[AEC][0-4][a-c]?\s*\/.*/s;
|
||||
const prefixRegex = /[AEC][0-4][a-c]?/;
|
||||
const classes: string[] = [];
|
||||
const matchingKeys: string[] = [];
|
||||
|
||||
currentSheet.eachRow((row) => {
|
||||
row.eachCell((cell) => {
|
||||
const cellAddress = cell.address;
|
||||
const value = cell.value;
|
||||
|
||||
if (value && typeof value === "string") {
|
||||
const testResult = regex.test(value);
|
||||
if (testResult && cellAddress.startsWith("A")) {
|
||||
const prefixMatch = value.match(prefixRegex);
|
||||
if (prefixMatch) {
|
||||
const prefix = prefixMatch[0];
|
||||
classes.push(prefix);
|
||||
}
|
||||
matchingKeys.push(cellAddress);
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
function letterToNumber(letter: string) {
|
||||
return letter.toLowerCase().charCodeAt(0) - "a".charCodeAt(0);
|
||||
}
|
||||
|
||||
// For each class
|
||||
let classI = 0;
|
||||
for (const matchingKey of matchingKeys) {
|
||||
const matchingCell = currentSheet.getCell(matchingKey);
|
||||
const rowNumber = matchingCell.row;
|
||||
const allKeys: string[] = [];
|
||||
|
||||
// Get all cells in the same row
|
||||
const row = currentSheet.getRow(Number(rowNumber));
|
||||
row.eachCell((cell) => {
|
||||
if (cell.address !== matchingKey) {
|
||||
allKeys.push(cell.address);
|
||||
}
|
||||
})
|
||||
|
||||
// Use an array directly, initialized with nulls or sparse array logic
|
||||
// The original code used `let final2 = []` but treated it as object `final2[parsedKey] = ...`
|
||||
// Then `Array.from(final2)` converts it to array.
|
||||
let final2: (string | null)[] = [];
|
||||
|
||||
for (const key of allKeys) {
|
||||
const cell = currentSheet.getCell(key);
|
||||
const parsedKey = letterToNumber(key.replace(/[0-9]/gi, ""));
|
||||
let d = true;
|
||||
|
||||
try {
|
||||
const regex = /^úklid\s+(?:\d+\s+)?[A-Za-z]{2}$/;
|
||||
const cellText = cell.text || "";
|
||||
// @ts-ignore - fgColor is missing in type definition for some versions or intricate structure
|
||||
const fgColor = cell.fill?.fgColor;
|
||||
if (regex.test(cellText.trim()) || cellText.trim().length == 0 || fgColor === undefined) {
|
||||
d = false;
|
||||
}
|
||||
} catch {}
|
||||
|
||||
if (d) {
|
||||
let text = cell.text;
|
||||
// @ts-ignore
|
||||
if (cell.fill?.fgColor?.argb == "FFFFFF00") {
|
||||
text += "\n(bude upřesněno)";
|
||||
}
|
||||
final2[parsedKey] = text || "";
|
||||
} else {
|
||||
final2[parsedKey] = null;
|
||||
}
|
||||
}
|
||||
|
||||
const final2Array = Array.from(final2, (item) => (item === undefined ? null : item));
|
||||
while (final2Array.length < 10) {
|
||||
final2Array.push(null);
|
||||
}
|
||||
|
||||
final[finalIndex][classes[classI]] = final2Array.slice(1, 11);
|
||||
|
||||
classI++;
|
||||
}
|
||||
|
||||
// ABSENCE
|
||||
final[finalIndex]["ABSENCE"] = [];
|
||||
let absenceKey: string | null = null;
|
||||
|
||||
currentSheet.eachRow((row) => {
|
||||
row.eachCell((cell) => {
|
||||
const value = (typeof cell.value === "string" ? cell.value : "").trim().toLowerCase();
|
||||
if (value === "absence") {
|
||||
absenceKey = cell.address;
|
||||
}
|
||||
})
|
||||
});
|
||||
|
||||
if (absenceKey) {
|
||||
const absenceCell = currentSheet.getCell(absenceKey);
|
||||
const rowNumber = absenceCell.row;
|
||||
const allAbsenceKeys: string[] = [];
|
||||
|
||||
// Get all cells in the same row as absence
|
||||
const row = currentSheet.getRow(Number(rowNumber));
|
||||
row.eachCell((cell) => {
|
||||
if (cell.address !== absenceKey) { // absenceKey is checked above to be non-null
|
||||
allAbsenceKeys.push(cell.address);
|
||||
}
|
||||
})
|
||||
|
||||
const absenceRange = new Set(["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "L"])
|
||||
for (const absenceKeyCur of allAbsenceKeys) {
|
||||
if (!absenceRange.has(absenceKeyCur.substring(0, 1))) {
|
||||
break;
|
||||
};
|
||||
|
||||
const cell = currentSheet.getCell(absenceKeyCur);
|
||||
const value = (cell.value || "").toString().trim();
|
||||
if (value.length === 0) {
|
||||
continue
|
||||
}
|
||||
|
||||
const data = parseAbsence(value, teacherMap);
|
||||
if (final[finalIndex]["ABSENCE"]) {
|
||||
final[finalIndex]["ABSENCE"]!.push(...data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
finalIndex++;
|
||||
}
|
||||
|
||||
const currentDate = new Date();
|
||||
const formattedDate = currentDate.getHours().toString().padStart(2, "0") + ":" + currentDate.getMinutes().toString().padStart(2, "0");
|
||||
|
||||
const data = {
|
||||
schedule: final,
|
||||
props: upcomingSheets.map((str) => {
|
||||
const dateMatch = str.match(/(\d{1,2})\.\s*(\d{1,2})\.\s*(\d{4}|\d{2})/);
|
||||
|
||||
let date = null;
|
||||
|
||||
if (dateMatch) {
|
||||
const day = Number.parseInt(dateMatch[1], 10);
|
||||
const month = Number.parseInt(dateMatch[2], 10);
|
||||
const year = dateMatch[3].length === 2 ? Number('20' + dateMatch[3]) : Number(dateMatch[3]);
|
||||
|
||||
date = new Date(year, month - 1, day);
|
||||
}
|
||||
|
||||
const isPriprava = str
|
||||
.toLowerCase()
|
||||
.normalize("NFD")
|
||||
.replace(/[\u0300-\u036f]/g, "")
|
||||
.includes("priprava");
|
||||
|
||||
return {
|
||||
date: date
|
||||
? `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, "0")}-${String(date.getDate()).padStart(2, "0")}`
|
||||
: null,
|
||||
priprava: isPriprava,
|
||||
};
|
||||
}),
|
||||
status: {
|
||||
lastUpdated: formattedDate,
|
||||
}
|
||||
}
|
||||
|
||||
fs.writeFileSync("db/v2.json", JSON.stringify(data, null, 2));
|
||||
|
||||
// Modify the data for v1
|
||||
const copy = JSON.parse(JSON.stringify(data));
|
||||
|
||||
copy.schedule.forEach((day: ScheduleDay) => {
|
||||
if (!Array.isArray(day.ABSENCE)) return;
|
||||
|
||||
day.ABSENCE = day.ABSENCE.map((old: any) => {
|
||||
if (old.type === "zastoupen") {
|
||||
return {
|
||||
type: "invalid",
|
||||
teacher: null,
|
||||
teacherCode: null,
|
||||
hours: null,
|
||||
original: `za ${old.teacherCode.toUpperCase()} zastupuje ${old.zastupuje.teacherCode.toUpperCase()}`
|
||||
};
|
||||
}
|
||||
return old;
|
||||
});
|
||||
});
|
||||
|
||||
fs.writeFileSync("db/v1.json", JSON.stringify(copy, null, 2))
|
||||
}
|
||||
|
||||
//parseV1V2("db/current.xlsx")
|
||||
Reference in New Issue
Block a user