1
0
Files
jecnarozvrh/scrape/parse.js
jzitnik-dev b600fe2669
All checks were successful
Remote Deploy / deploy (push) Successful in 4s
fix: Again
2025-09-07 20:18:43 +02:00

203 lines
5.6 KiB
JavaScript

import ExcelJS from "exceljs"
import fs from "fs"
import parseAbsence from "./utils/parseAbsence.js"
import parseTeachers from "./utils/parseTeachers.js"
export default async function parseThisShit(downloadedFilePath) {
const workbook = new ExcelJS.Workbook()
await workbook.xlsx.readFile(downloadedFilePath)
const sheetNames = workbook.worksheets.map((sheet) => sheet.name)
const dateRegex = /^(pondělí|úterý|středa|čtvrtek|pátek)\s+(\d{1,2})\.\s*(\d{1,2})\.\s*(20\d{2})/i
// Get today's date for comparison
function getCurrentDateObject() {
const now = new Date()
return new Date(now.getFullYear(), now.getMonth(), now.getDate())
}
const today = getCurrentDateObject()
const upcomingSheets = sheetNames.filter((name) => {
const match = name.match(dateRegex)
if (!match) return false
const day = Number.parseInt(match[2], 10)
const month = Number.parseInt(match[3], 10) - 1 // JavaScript months are 0-indexed
const year = Number.parseInt(match[4], 10)
const sheetDate = new Date(year, month, day)
return sheetDate >= today
})
const final = []
let finalIndex = 0
for (const key of upcomingSheets) {
const currentSheet = workbook.getWorksheet(key)
final.push({})
const regex = /[AEC][0-4][a-c]?\s*\/.*/s
const prefixRegex = /[AEC][0-4][a-c]?/
const classes = []
const matchingKeys = []
currentSheet.eachRow((row, rowNumber) => {
row.eachCell((cell, colNumber) => {
const cellAddress = cell.address
const value = cell.value
if (value && typeof value === "string") {
const testResult = regex.test(value)
if (testResult) {
const prefixMatch = value.match(prefixRegex)
if (prefixMatch) {
const prefix = prefixMatch[0]
classes.push(prefix)
}
matchingKeys.push(cellAddress)
}
}
})
})
function letterToNumber(letter) {
return letter.toLowerCase().charCodeAt(0) - "a".charCodeAt(0)
}
// For each class
let classI = 0
for (const matchingKey of matchingKeys) {
const matchingCell = currentSheet.getCell(matchingKey)
const rowNumber = matchingCell.row
const allKeys = []
// Get all cells in the same row
const row = currentSheet.getRow(rowNumber)
row.eachCell((cell, colNumber) => {
if (cell.address !== matchingKey) {
allKeys.push(cell.address)
}
})
let final2 = []
for (const key of allKeys) {
const cell = currentSheet.getCell(key)
const parsedKey = letterToNumber(key.replace(/[0-9]/gi, ""))
let d = true
try {
const regex = /^úklid\s+(?:\d+\s+)?[A-Za-z]{2}$/
const cellText = cell.text || ""
if (regex.test(cellText.trim()) || cellText.trim().length == 0 || cell.fill?.fgColor === undefined) {
d = false
}
} catch {}
if (d) {
let text = cell.text;
if (cell.fill?.fgColor?.argb == "FFFFFF00") {
text += "\n(bude upřesněno)";
}
final2[parsedKey] = text || ""
} else {
final2[parsedKey] = null
}
}
final2 = Array.from(final2, (item) => (item === undefined ? null : item))
while (final2.length < 10) {
final2.push(null)
}
final[finalIndex][classes[classI]] = final2.slice(1, 11)
classI++
}
// ABSENCE
final[finalIndex]["ABSENCE"] = []
let absenceKey = null
currentSheet.eachRow((row, rowNumber) => {
row.eachCell((cell, colNumber) => {
const value = (typeof cell.value === "string" ? cell.value : "").trim().toLowerCase()
if (value === "absence") {
absenceKey = cell.address
}
})
})
if (absenceKey) {
const teacherMap = await parseTeachers()
const absenceCell = currentSheet.getCell(absenceKey)
const rowNumber = absenceCell.row
const allAbsenceKeys = []
// Get all cells in the same row as absence
const row = currentSheet.getRow(rowNumber)
row.eachCell((cell, colNumber) => {
if (cell.address !== absenceKey) {
allAbsenceKeys.push(cell.address)
}
})
let i = 0
for (const absenceKeyCur of allAbsenceKeys) {
if (i >= 10) {
break // stop once 10 items are added
}
i++
const cell = currentSheet.getCell(absenceKeyCur)
const value = (cell.value || "").toString().trim()
if (value.length === 0) {
continue
}
const data = parseAbsence(value, teacherMap)
final[finalIndex]["ABSENCE"].push(data)
}
}
finalIndex++
}
const data = {
schedule: final,
props: upcomingSheets.map((str) => {
const dateMatch = str.match(/(\d{1,2})\.\s*(\d{1,2})\.\s*(\d{4})/)
let date = null
if (dateMatch) {
const day = Number.parseInt(dateMatch[1], 10)
const month = Number.parseInt(dateMatch[2], 10)
const year = Number.parseInt(dateMatch[3], 10)
date = new Date(year, month - 1, day)
}
const isPriprava = str
.toLowerCase()
.normalize("NFD")
.replace(/[\u0300-\u036f]/g, "")
.includes("priprava")
return {
date: date
? `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, "0")}-${String(date.getDate()).padStart(2, "0")}`
: null,
priprava: isPriprava,
}
}),
}
fs.writeFileSync("db/current.json", JSON.stringify(data))
}
// parseThisShit("downloads/table.xlsx")