diff --git a/scrape/parse.js b/scrape/parse.js index 026ebf3..8641abd 100644 --- a/scrape/parse.js +++ b/scrape/parse.js @@ -4,96 +4,96 @@ import parseAbsence from "./utils/parseAbsence.js" import parseTeachers from "./utils/parseTeachers.js" export default async function parseThisShit(downloadedFilePath) { - const workbook = new ExcelJS.Workbook() - await workbook.xlsx.readFile(downloadedFilePath) + const workbook = new ExcelJS.Workbook(); + await workbook.xlsx.readFile(downloadedFilePath); - const sheetNames = workbook.worksheets.map((sheet) => sheet.name) + const sheetNames = workbook.worksheets.map((sheet) => sheet.name); - const dateRegex = /^(pondělí|úterý|středa|čtvrtek|pátek)\s+(\d{1,2})\.\s*(\d{1,2})\.\s*(20\d{2})/i + const dateRegex = /^(pondělí|úterý|středa|čtvrtek|pátek|po|út|ut|st|čt|ct|pa|pá)\s+(\d{1,2})\.\s*(\d{1,2})\.\s*(20\d{2})/i; // Get today's date for comparison function getCurrentDateObject() { - const now = new Date() - return new Date(now.getFullYear(), now.getMonth(), now.getDate()) + const now = new Date(); + return new Date(now.getFullYear(), now.getMonth(), now.getDate()); } - const today = getCurrentDateObject() + const today = getCurrentDateObject(); const upcomingSheets = sheetNames.filter((name) => { - const match = name.match(dateRegex) - if (!match) return false + const match = name.match(dateRegex); + if (!match) return false; - const day = Number.parseInt(match[2], 10) - const month = Number.parseInt(match[3], 10) - 1 // JavaScript months are 0-indexed - const year = Number.parseInt(match[4], 10) + const day = Number.parseInt(match[2], 10); + const month = Number.parseInt(match[3], 10) - 1; // JavaScript months are 0-indexed + const year = Number.parseInt(match[4], 10); - const sheetDate = new Date(year, month, day) + const sheetDate = new Date(year, month, day); - return sheetDate >= today + return sheetDate >= today; }) - const final = [] + const final = []; let finalIndex = 0 for (const key of upcomingSheets) { - const currentSheet = workbook.getWorksheet(key) - final.push({}) + const currentSheet = workbook.getWorksheet(key); + final.push({}); - const regex = /[AEC][0-4][a-c]?\s*\/.*/s - const prefixRegex = /[AEC][0-4][a-c]?/ - const classes = [] - const matchingKeys = [] + const regex = /[AEC][0-4][a-c]?\s*\/.*/s; + const prefixRegex = /[AEC][0-4][a-c]?/; + const classes = []; + const matchingKeys = []; - currentSheet.eachRow((row, rowNumber) => { - row.eachCell((cell, colNumber) => { - const cellAddress = cell.address - const value = cell.value + currentSheet.eachRow((row) => { + row.eachCell((cell) => { + const cellAddress = cell.address; + const value = cell.value; if (value && typeof value === "string") { - const testResult = regex.test(value) + const testResult = regex.test(value); if (testResult) { - const prefixMatch = value.match(prefixRegex) + const prefixMatch = value.match(prefixRegex); if (prefixMatch) { - const prefix = prefixMatch[0] - classes.push(prefix) + const prefix = prefixMatch[0]; + classes.push(prefix); } - matchingKeys.push(cellAddress) + matchingKeys.push(cellAddress); } } }) }) function letterToNumber(letter) { - return letter.toLowerCase().charCodeAt(0) - "a".charCodeAt(0) + return letter.toLowerCase().charCodeAt(0) - "a".charCodeAt(0); } // For each class - let classI = 0 + let classI = 0; for (const matchingKey of matchingKeys) { - const matchingCell = currentSheet.getCell(matchingKey) - const rowNumber = matchingCell.row - const allKeys = [] + const matchingCell = currentSheet.getCell(matchingKey); + const rowNumber = matchingCell.row; + const allKeys = []; // Get all cells in the same row - const row = currentSheet.getRow(rowNumber) - row.eachCell((cell, colNumber) => { + const row = currentSheet.getRow(rowNumber); + row.eachCell((cell) => { if (cell.address !== matchingKey) { - allKeys.push(cell.address) + allKeys.push(cell.address); } }) - let final2 = [] + let final2 = []; for (const key of allKeys) { - const cell = currentSheet.getCell(key) - const parsedKey = letterToNumber(key.replace(/[0-9]/gi, "")) - let d = true + const cell = currentSheet.getCell(key); + const parsedKey = letterToNumber(key.replace(/[0-9]/gi, "")); + let d = true; try { - const regex = /^úklid\s+(?:\d+\s+)?[A-Za-z]{2}$/ - const cellText = cell.text || "" + const regex = /^úklid\s+(?:\d+\s+)?[A-Za-z]{2}$/; + const cellText = cell.text || ""; if (regex.test(cellText.trim()) || cellText.trim().length == 0 || cell.fill?.fgColor === undefined) { - d = false + d = false; } } catch {} @@ -102,101 +102,101 @@ export default async function parseThisShit(downloadedFilePath) { if (cell.fill?.fgColor?.argb == "FFFFFF00") { text += "\n(bude upřesněno)"; } - final2[parsedKey] = text || "" + final2[parsedKey] = text || ""; } else { - final2[parsedKey] = null + final2[parsedKey] = null; } } - final2 = Array.from(final2, (item) => (item === undefined ? null : item)) + final2 = Array.from(final2, (item) => (item === undefined ? null : item)); while (final2.length < 10) { - final2.push(null) + final2.push(null); } - final[finalIndex][classes[classI]] = final2.slice(1, 11) + final[finalIndex][classes[classI]] = final2.slice(1, 11); - classI++ + classI++; } // ABSENCE - final[finalIndex]["ABSENCE"] = [] - let absenceKey = null + final[finalIndex]["ABSENCE"] = []; + let absenceKey = null; - currentSheet.eachRow((row, rowNumber) => { - row.eachCell((cell, colNumber) => { - const value = (typeof cell.value === "string" ? cell.value : "").trim().toLowerCase() + currentSheet.eachRow((row) => { + row.eachCell((cell) => { + const value = (typeof cell.value === "string" ? cell.value : "").trim().toLowerCase(); if (value === "absence") { - absenceKey = cell.address + absenceKey = cell.address; } }) - }) + }); if (absenceKey) { - const teacherMap = await parseTeachers() - const absenceCell = currentSheet.getCell(absenceKey) - const rowNumber = absenceCell.row - const allAbsenceKeys = [] + const teacherMap = await parseTeachers(); + const absenceCell = currentSheet.getCell(absenceKey); + const rowNumber = absenceCell.row; + const allAbsenceKeys = []; // Get all cells in the same row as absence - const row = currentSheet.getRow(rowNumber) - row.eachCell((cell, colNumber) => { + const row = currentSheet.getRow(rowNumber); + row.eachCell((cell) => { if (cell.address !== absenceKey) { - allAbsenceKeys.push(cell.address) + allAbsenceKeys.push(cell.address); } }) - let i = 0 + let i = 0; for (const absenceKeyCur of allAbsenceKeys) { if (i >= 10) { - break // stop once 10 items are added + break; // stop once 10 items are added } - i++ + i++; - const cell = currentSheet.getCell(absenceKeyCur) - const value = (cell.value || "").toString().trim() + const cell = currentSheet.getCell(absenceKeyCur); + const value = (cell.value || "").toString().trim(); if (value.length === 0) { continue } - const data = parseAbsence(value, teacherMap) - final[finalIndex]["ABSENCE"].push(data) + const data = parseAbsence(value, teacherMap); + final[finalIndex]["ABSENCE"].push(data); } } - finalIndex++ + finalIndex++; } const data = { schedule: final, props: upcomingSheets.map((str) => { - const dateMatch = str.match(/(\d{1,2})\.\s*(\d{1,2})\.\s*(\d{4})/) + const dateMatch = str.match(/(\d{1,2})\.\s*(\d{1,2})\.\s*(\d{4})/); - let date = null + let date = null; if (dateMatch) { - const day = Number.parseInt(dateMatch[1], 10) - const month = Number.parseInt(dateMatch[2], 10) - const year = Number.parseInt(dateMatch[3], 10) + const day = Number.parseInt(dateMatch[1], 10); + const month = Number.parseInt(dateMatch[2], 10); + const year = Number.parseInt(dateMatch[3], 10); - date = new Date(year, month - 1, day) + date = new Date(year, month - 1, day); } const isPriprava = str .toLowerCase() .normalize("NFD") .replace(/[\u0300-\u036f]/g, "") - .includes("priprava") + .includes("priprava"); return { date: date ? `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, "0")}-${String(date.getDate()).padStart(2, "0")}` : null, priprava: isPriprava, - } + }; }), } - fs.writeFileSync("db/current.json", JSON.stringify(data)) + fs.writeFileSync("db/current.json", JSON.stringify(data)); } // parseThisShit("downloads/table.xlsx")