1
0
Files
jecnarozvrh/scrape/parse.js
jzitnik-dev 3b10ab09e7
All checks were successful
Remote Deploy / deploy (push) Successful in 3s
chore: Add trim
2025-09-04 08:29:01 +02:00

154 lines
4.4 KiB
JavaScript

import XLSX from "xlsx";
import fs from "fs";
import parseAbsence from "./utils/parseAbsence.js";
import parseTeachers from "./utils/parseTeachers.js";
export default async function parseThisShit(downloadedFilePath) {
const workbook = XLSX.readFile(downloadedFilePath);
const sheetNames = workbook.SheetNames;
const dateRegex = /^(pondělí|úterý|středa|čtvrtek|pátek)\s+(\d{1,2})\.\s*(\d{1,2})\.\s*(20\d{2})/i;
// Get today's date for comparison
function getCurrentDateObject() {
const now = new Date();
return new Date(now.getFullYear(), now.getMonth(), now.getDate());
}
const today = getCurrentDateObject();
const upcomingSheets = sheetNames.filter(name => {
const match = name.match(dateRegex);
if (!match) return false;
const day = parseInt(match[2], 10);
const month = parseInt(match[3], 10) - 1; // JavaScript months are 0-indexed
const year = parseInt(match[4], 10);
const sheetDate = new Date(year, month, day);
return sheetDate >= today;
});
const final = [];
let finalIndex = 0;
for (const key of upcomingSheets) {
const currentSheet = workbook.Sheets[key];
final.push({});
const regex = /[AEC][0-4][a-c]?\s*\/.*/s;
const prefixRegex = /[AEC][0-4][a-c]?/;
const classes = [];
const matchingKeys = Object.keys(currentSheet).filter(key => {
const value = currentSheet[key];
const testResult = regex.test(value.v);
if (testResult) {
const prefixMatch = value.v.match(prefixRegex);
if (prefixMatch) {
const prefix = prefixMatch[0];
classes.push(prefix);
}
}
return testResult;
});
function letterToNumber(letter) {
return letter.toLowerCase().charCodeAt(0) - 'a'.charCodeAt(0);
}
// For each class
let classI = 0;
for (const matchingKey of matchingKeys) {
const allKeys = Object.keys(currentSheet).filter(key => key !== matchingKey && key.replace(/[a-z]/gi, '') == matchingKey.replace(/[a-z]/gi, ''));
let final2 = [];
for (const key of allKeys) {
const parsedKey = letterToNumber(key.replace(/[0-9]/gi, '')) - 1;
let d = true;
try {
const regex = /^úklid\s+\d+\s+[A-Za-z]{2}$/;
if (regex.test(currentSheet[key]["w"].trim()) || currentSheet[key]["w"].trim().length == 0) {
d = false;
}
} catch {}
if (d) {
final2[parsedKey] = currentSheet[key]["w"];
} else {
final2[parsedKey] = null;
}
}
final2 = Array.from(final2, item => item === undefined ? null : item);
while (final2.length < 10) {
final2.push(null);
}
final[finalIndex][classes[classI]] = final2.slice(1, 11);;
classI++;
}
// ABSENCE
final[finalIndex]["ABSENCE"] = [];
const absenceKey = Object.keys(currentSheet).find(key => {
const value = (typeof currentSheet[key].v == "string" ? currentSheet[key].v : "").trim().toLowerCase();
return value == "absence";
});
const teacherMap = await parseTeachers();
const allAbsenceKeys = Object.keys(currentSheet).filter(key => key !== absenceKey && key.replace(/[a-z]/gi, '') == absenceKey.replace(/[a-z]/gi, ''));
let i = 0;
for (const absenceKeyCur of allAbsenceKeys) {
if (i >= 10) {
break; // stop once 10 items are added
}
i++;
const value = currentSheet[absenceKeyCur]["v"].trim();
if (value.length === 0) {
continue;
}
const data = parseAbsence(value, teacherMap);
final[finalIndex]["ABSENCE"].push(data);
}
finalIndex++;
return
}
const data = {
schedule: final,
props: upcomingSheets.map(str => {
const dateMatch = str.match(/(\d{1,2})\.\s*(\d{1,2})\.\s*(\d{4})/);
let date = null;
if (dateMatch) {
const day = parseInt(dateMatch[1], 10);
const month = parseInt(dateMatch[2], 10);
const year = parseInt(dateMatch[3], 10);
date = new Date(year, month - 1, day);
}
const isPriprava = str.toLowerCase().normalize("NFD").replace(/[\u0300-\u036f]/g, "").includes("priprava");
return {
date: date
? `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}-${String(date.getDate()).padStart(2, '0')}`
: null,
priprava: isPriprava,
};
}),
}
fs.writeFileSync('db/current.json', JSON.stringify(data));
}
parseThisShit("downloads/table.xlsx");