Ti kokoti nyní místo "čtvrtek 23.10.2025 příprava" dali jen "čt 23.10.2025 příprava", což samozřejmě zase rozbilo parser. Nyní se v názvu stránky může objevit i zkrácená forma.
This commit is contained in:
164
scrape/parse.js
164
scrape/parse.js
@@ -4,96 +4,96 @@ import parseAbsence from "./utils/parseAbsence.js"
|
|||||||
import parseTeachers from "./utils/parseTeachers.js"
|
import parseTeachers from "./utils/parseTeachers.js"
|
||||||
|
|
||||||
export default async function parseThisShit(downloadedFilePath) {
|
export default async function parseThisShit(downloadedFilePath) {
|
||||||
const workbook = new ExcelJS.Workbook()
|
const workbook = new ExcelJS.Workbook();
|
||||||
await workbook.xlsx.readFile(downloadedFilePath)
|
await workbook.xlsx.readFile(downloadedFilePath);
|
||||||
|
|
||||||
const sheetNames = workbook.worksheets.map((sheet) => sheet.name)
|
const sheetNames = workbook.worksheets.map((sheet) => sheet.name);
|
||||||
|
|
||||||
const dateRegex = /^(pondělí|úterý|středa|čtvrtek|pátek)\s+(\d{1,2})\.\s*(\d{1,2})\.\s*(20\d{2})/i
|
const dateRegex = /^(pondělí|úterý|středa|čtvrtek|pátek|po|út|ut|st|čt|ct|pa|pá)\s+(\d{1,2})\.\s*(\d{1,2})\.\s*(20\d{2})/i;
|
||||||
|
|
||||||
// Get today's date for comparison
|
// Get today's date for comparison
|
||||||
function getCurrentDateObject() {
|
function getCurrentDateObject() {
|
||||||
const now = new Date()
|
const now = new Date();
|
||||||
return new Date(now.getFullYear(), now.getMonth(), now.getDate())
|
return new Date(now.getFullYear(), now.getMonth(), now.getDate());
|
||||||
}
|
}
|
||||||
|
|
||||||
const today = getCurrentDateObject()
|
const today = getCurrentDateObject();
|
||||||
|
|
||||||
const upcomingSheets = sheetNames.filter((name) => {
|
const upcomingSheets = sheetNames.filter((name) => {
|
||||||
const match = name.match(dateRegex)
|
const match = name.match(dateRegex);
|
||||||
if (!match) return false
|
if (!match) return false;
|
||||||
|
|
||||||
const day = Number.parseInt(match[2], 10)
|
const day = Number.parseInt(match[2], 10);
|
||||||
const month = Number.parseInt(match[3], 10) - 1 // JavaScript months are 0-indexed
|
const month = Number.parseInt(match[3], 10) - 1; // JavaScript months are 0-indexed
|
||||||
const year = Number.parseInt(match[4], 10)
|
const year = Number.parseInt(match[4], 10);
|
||||||
|
|
||||||
const sheetDate = new Date(year, month, day)
|
const sheetDate = new Date(year, month, day);
|
||||||
|
|
||||||
return sheetDate >= today
|
return sheetDate >= today;
|
||||||
})
|
})
|
||||||
|
|
||||||
const final = []
|
const final = [];
|
||||||
|
|
||||||
let finalIndex = 0
|
let finalIndex = 0
|
||||||
for (const key of upcomingSheets) {
|
for (const key of upcomingSheets) {
|
||||||
const currentSheet = workbook.getWorksheet(key)
|
const currentSheet = workbook.getWorksheet(key);
|
||||||
final.push({})
|
final.push({});
|
||||||
|
|
||||||
const regex = /[AEC][0-4][a-c]?\s*\/.*/s
|
const regex = /[AEC][0-4][a-c]?\s*\/.*/s;
|
||||||
const prefixRegex = /[AEC][0-4][a-c]?/
|
const prefixRegex = /[AEC][0-4][a-c]?/;
|
||||||
const classes = []
|
const classes = [];
|
||||||
const matchingKeys = []
|
const matchingKeys = [];
|
||||||
|
|
||||||
currentSheet.eachRow((row, rowNumber) => {
|
currentSheet.eachRow((row) => {
|
||||||
row.eachCell((cell, colNumber) => {
|
row.eachCell((cell) => {
|
||||||
const cellAddress = cell.address
|
const cellAddress = cell.address;
|
||||||
const value = cell.value
|
const value = cell.value;
|
||||||
|
|
||||||
if (value && typeof value === "string") {
|
if (value && typeof value === "string") {
|
||||||
const testResult = regex.test(value)
|
const testResult = regex.test(value);
|
||||||
if (testResult) {
|
if (testResult) {
|
||||||
const prefixMatch = value.match(prefixRegex)
|
const prefixMatch = value.match(prefixRegex);
|
||||||
if (prefixMatch) {
|
if (prefixMatch) {
|
||||||
const prefix = prefixMatch[0]
|
const prefix = prefixMatch[0];
|
||||||
classes.push(prefix)
|
classes.push(prefix);
|
||||||
}
|
}
|
||||||
matchingKeys.push(cellAddress)
|
matchingKeys.push(cellAddress);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
function letterToNumber(letter) {
|
function letterToNumber(letter) {
|
||||||
return letter.toLowerCase().charCodeAt(0) - "a".charCodeAt(0)
|
return letter.toLowerCase().charCodeAt(0) - "a".charCodeAt(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// For each class
|
// For each class
|
||||||
let classI = 0
|
let classI = 0;
|
||||||
for (const matchingKey of matchingKeys) {
|
for (const matchingKey of matchingKeys) {
|
||||||
const matchingCell = currentSheet.getCell(matchingKey)
|
const matchingCell = currentSheet.getCell(matchingKey);
|
||||||
const rowNumber = matchingCell.row
|
const rowNumber = matchingCell.row;
|
||||||
const allKeys = []
|
const allKeys = [];
|
||||||
|
|
||||||
// Get all cells in the same row
|
// Get all cells in the same row
|
||||||
const row = currentSheet.getRow(rowNumber)
|
const row = currentSheet.getRow(rowNumber);
|
||||||
row.eachCell((cell, colNumber) => {
|
row.eachCell((cell) => {
|
||||||
if (cell.address !== matchingKey) {
|
if (cell.address !== matchingKey) {
|
||||||
allKeys.push(cell.address)
|
allKeys.push(cell.address);
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
let final2 = []
|
let final2 = [];
|
||||||
|
|
||||||
for (const key of allKeys) {
|
for (const key of allKeys) {
|
||||||
const cell = currentSheet.getCell(key)
|
const cell = currentSheet.getCell(key);
|
||||||
const parsedKey = letterToNumber(key.replace(/[0-9]/gi, ""))
|
const parsedKey = letterToNumber(key.replace(/[0-9]/gi, ""));
|
||||||
let d = true
|
let d = true;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const regex = /^úklid\s+(?:\d+\s+)?[A-Za-z]{2}$/
|
const regex = /^úklid\s+(?:\d+\s+)?[A-Za-z]{2}$/;
|
||||||
const cellText = cell.text || ""
|
const cellText = cell.text || "";
|
||||||
if (regex.test(cellText.trim()) || cellText.trim().length == 0 || cell.fill?.fgColor === undefined) {
|
if (regex.test(cellText.trim()) || cellText.trim().length == 0 || cell.fill?.fgColor === undefined) {
|
||||||
d = false
|
d = false;
|
||||||
}
|
}
|
||||||
} catch {}
|
} catch {}
|
||||||
|
|
||||||
@@ -102,101 +102,101 @@ export default async function parseThisShit(downloadedFilePath) {
|
|||||||
if (cell.fill?.fgColor?.argb == "FFFFFF00") {
|
if (cell.fill?.fgColor?.argb == "FFFFFF00") {
|
||||||
text += "\n(bude upřesněno)";
|
text += "\n(bude upřesněno)";
|
||||||
}
|
}
|
||||||
final2[parsedKey] = text || ""
|
final2[parsedKey] = text || "";
|
||||||
} else {
|
} else {
|
||||||
final2[parsedKey] = null
|
final2[parsedKey] = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final2 = Array.from(final2, (item) => (item === undefined ? null : item))
|
final2 = Array.from(final2, (item) => (item === undefined ? null : item));
|
||||||
while (final2.length < 10) {
|
while (final2.length < 10) {
|
||||||
final2.push(null)
|
final2.push(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
final[finalIndex][classes[classI]] = final2.slice(1, 11)
|
final[finalIndex][classes[classI]] = final2.slice(1, 11);
|
||||||
|
|
||||||
classI++
|
classI++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ABSENCE
|
// ABSENCE
|
||||||
final[finalIndex]["ABSENCE"] = []
|
final[finalIndex]["ABSENCE"] = [];
|
||||||
let absenceKey = null
|
let absenceKey = null;
|
||||||
|
|
||||||
currentSheet.eachRow((row, rowNumber) => {
|
currentSheet.eachRow((row) => {
|
||||||
row.eachCell((cell, colNumber) => {
|
row.eachCell((cell) => {
|
||||||
const value = (typeof cell.value === "string" ? cell.value : "").trim().toLowerCase()
|
const value = (typeof cell.value === "string" ? cell.value : "").trim().toLowerCase();
|
||||||
if (value === "absence") {
|
if (value === "absence") {
|
||||||
absenceKey = cell.address
|
absenceKey = cell.address;
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
})
|
});
|
||||||
|
|
||||||
if (absenceKey) {
|
if (absenceKey) {
|
||||||
const teacherMap = await parseTeachers()
|
const teacherMap = await parseTeachers();
|
||||||
const absenceCell = currentSheet.getCell(absenceKey)
|
const absenceCell = currentSheet.getCell(absenceKey);
|
||||||
const rowNumber = absenceCell.row
|
const rowNumber = absenceCell.row;
|
||||||
const allAbsenceKeys = []
|
const allAbsenceKeys = [];
|
||||||
|
|
||||||
// Get all cells in the same row as absence
|
// Get all cells in the same row as absence
|
||||||
const row = currentSheet.getRow(rowNumber)
|
const row = currentSheet.getRow(rowNumber);
|
||||||
row.eachCell((cell, colNumber) => {
|
row.eachCell((cell) => {
|
||||||
if (cell.address !== absenceKey) {
|
if (cell.address !== absenceKey) {
|
||||||
allAbsenceKeys.push(cell.address)
|
allAbsenceKeys.push(cell.address);
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
let i = 0
|
let i = 0;
|
||||||
for (const absenceKeyCur of allAbsenceKeys) {
|
for (const absenceKeyCur of allAbsenceKeys) {
|
||||||
if (i >= 10) {
|
if (i >= 10) {
|
||||||
break // stop once 10 items are added
|
break; // stop once 10 items are added
|
||||||
}
|
}
|
||||||
i++
|
i++;
|
||||||
|
|
||||||
const cell = currentSheet.getCell(absenceKeyCur)
|
const cell = currentSheet.getCell(absenceKeyCur);
|
||||||
const value = (cell.value || "").toString().trim()
|
const value = (cell.value || "").toString().trim();
|
||||||
if (value.length === 0) {
|
if (value.length === 0) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
const data = parseAbsence(value, teacherMap)
|
const data = parseAbsence(value, teacherMap);
|
||||||
final[finalIndex]["ABSENCE"].push(data)
|
final[finalIndex]["ABSENCE"].push(data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
finalIndex++
|
finalIndex++;
|
||||||
}
|
}
|
||||||
|
|
||||||
const data = {
|
const data = {
|
||||||
schedule: final,
|
schedule: final,
|
||||||
props: upcomingSheets.map((str) => {
|
props: upcomingSheets.map((str) => {
|
||||||
const dateMatch = str.match(/(\d{1,2})\.\s*(\d{1,2})\.\s*(\d{4})/)
|
const dateMatch = str.match(/(\d{1,2})\.\s*(\d{1,2})\.\s*(\d{4})/);
|
||||||
|
|
||||||
let date = null
|
let date = null;
|
||||||
|
|
||||||
if (dateMatch) {
|
if (dateMatch) {
|
||||||
const day = Number.parseInt(dateMatch[1], 10)
|
const day = Number.parseInt(dateMatch[1], 10);
|
||||||
const month = Number.parseInt(dateMatch[2], 10)
|
const month = Number.parseInt(dateMatch[2], 10);
|
||||||
const year = Number.parseInt(dateMatch[3], 10)
|
const year = Number.parseInt(dateMatch[3], 10);
|
||||||
|
|
||||||
date = new Date(year, month - 1, day)
|
date = new Date(year, month - 1, day);
|
||||||
}
|
}
|
||||||
|
|
||||||
const isPriprava = str
|
const isPriprava = str
|
||||||
.toLowerCase()
|
.toLowerCase()
|
||||||
.normalize("NFD")
|
.normalize("NFD")
|
||||||
.replace(/[\u0300-\u036f]/g, "")
|
.replace(/[\u0300-\u036f]/g, "")
|
||||||
.includes("priprava")
|
.includes("priprava");
|
||||||
|
|
||||||
return {
|
return {
|
||||||
date: date
|
date: date
|
||||||
? `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, "0")}-${String(date.getDate()).padStart(2, "0")}`
|
? `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, "0")}-${String(date.getDate()).padStart(2, "0")}`
|
||||||
: null,
|
: null,
|
||||||
priprava: isPriprava,
|
priprava: isPriprava,
|
||||||
}
|
};
|
||||||
}),
|
}),
|
||||||
}
|
}
|
||||||
|
|
||||||
fs.writeFileSync("db/current.json", JSON.stringify(data))
|
fs.writeFileSync("db/current.json", JSON.stringify(data));
|
||||||
}
|
}
|
||||||
|
|
||||||
// parseThisShit("downloads/table.xlsx")
|
// parseThisShit("downloads/table.xlsx")
|
||||||
|
|||||||
Reference in New Issue
Block a user