feat: Use better lib for parsing and minor changes
All checks were successful
Remote Deploy / deploy (push) Successful in 6s
All checks were successful
Remote Deploy / deploy (push) Successful in 6s
This commit is contained in:
268
scrape/parse.js
268
scrape/parse.js
@@ -1,152 +1,250 @@
|
||||
import XLSX from "xlsx";
|
||||
import fs from "fs";
|
||||
import parseAbsence from "./utils/parseAbsence.js";
|
||||
import parseTeachers from "./utils/parseTeachers.js";
|
||||
import ExcelJS from "exceljs"
|
||||
import fs from "fs"
|
||||
import parseAbsence from "./utils/parseAbsence.js"
|
||||
import parseTeachers from "./utils/parseTeachers.js"
|
||||
|
||||
export default async function parseThisShit(downloadedFilePath) {
|
||||
const workbook = XLSX.readFile(downloadedFilePath);
|
||||
const sheetNames = workbook.SheetNames;
|
||||
const workbook = new ExcelJS.Workbook()
|
||||
await workbook.xlsx.readFile(downloadedFilePath)
|
||||
|
||||
const dateRegex = /^(pondělí|úterý|středa|čtvrtek|pátek)\s+(\d{1,2})\.\s*(\d{1,2})\.\s*(20\d{2})/i;
|
||||
const sheetNames = workbook.worksheets.map((sheet) => sheet.name)
|
||||
|
||||
const dateRegex = /^(pondělí|úterý|středa|čtvrtek|pátek)\s+(\d{1,2})\.\s*(\d{1,2})\.\s*(20\d{2})/i
|
||||
|
||||
// Get today's date for comparison
|
||||
function getCurrentDateObject() {
|
||||
const now = new Date();
|
||||
return new Date(now.getFullYear(), now.getMonth(), now.getDate());
|
||||
const now = new Date()
|
||||
return new Date(now.getFullYear(), now.getMonth(), now.getDate())
|
||||
}
|
||||
|
||||
const today = getCurrentDateObject();
|
||||
const today = getCurrentDateObject()
|
||||
|
||||
const upcomingSheets = sheetNames.filter(name => {
|
||||
const match = name.match(dateRegex);
|
||||
if (!match) return false;
|
||||
const upcomingSheets = sheetNames.filter((name) => {
|
||||
const match = name.match(dateRegex)
|
||||
if (!match) return false
|
||||
|
||||
const day = parseInt(match[2], 10);
|
||||
const month = parseInt(match[3], 10) - 1; // JavaScript months are 0-indexed
|
||||
const year = parseInt(match[4], 10);
|
||||
const day = Number.parseInt(match[2], 10)
|
||||
const month = Number.parseInt(match[3], 10) - 1 // JavaScript months are 0-indexed
|
||||
const year = Number.parseInt(match[4], 10)
|
||||
|
||||
const sheetDate = new Date(year, month, day);
|
||||
const sheetDate = new Date(year, month, day)
|
||||
|
||||
return sheetDate >= today;
|
||||
});
|
||||
return sheetDate >= today
|
||||
})
|
||||
|
||||
const final = [];
|
||||
const final = []
|
||||
|
||||
let finalIndex = 0;
|
||||
let finalIndex = 0
|
||||
for (const key of upcomingSheets) {
|
||||
const currentSheet = workbook.Sheets[key];
|
||||
final.push({});
|
||||
const currentSheet = workbook.getWorksheet(key)
|
||||
final.push({})
|
||||
|
||||
const regex = /[AEC][0-4][a-c]?\s*\/.*/s;
|
||||
const prefixRegex = /[AEC][0-4][a-c]?/;
|
||||
const classes = [];
|
||||
const matchingKeys = Object.keys(currentSheet).filter(key => {
|
||||
const value = currentSheet[key];
|
||||
const regex = /[AEC][0-4][a-c]?\s*\/.*/s
|
||||
const prefixRegex = /[AEC][0-4][a-c]?/
|
||||
const classes = []
|
||||
const matchingKeys = []
|
||||
|
||||
const testResult = regex.test(value.v);
|
||||
if (testResult) {
|
||||
const prefixMatch = value.v.match(prefixRegex);
|
||||
if (prefixMatch) {
|
||||
const prefix = prefixMatch[0];
|
||||
classes.push(prefix);
|
||||
currentSheet.eachRow((row, rowNumber) => {
|
||||
row.eachCell((cell, colNumber) => {
|
||||
const cellAddress = cell.address
|
||||
const value = cell.value
|
||||
|
||||
if (value && typeof value === "string") {
|
||||
const testResult = regex.test(value)
|
||||
if (testResult) {
|
||||
const prefixMatch = value.match(prefixRegex)
|
||||
if (prefixMatch) {
|
||||
const prefix = prefixMatch[0]
|
||||
classes.push(prefix)
|
||||
}
|
||||
matchingKeys.push(cellAddress)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return testResult;
|
||||
});
|
||||
})
|
||||
})
|
||||
|
||||
function letterToNumber(letter) {
|
||||
return letter.toLowerCase().charCodeAt(0) - 'a'.charCodeAt(0);
|
||||
return letter.toLowerCase().charCodeAt(0) - "a".charCodeAt(0)
|
||||
}
|
||||
|
||||
// For each class
|
||||
let classI = 0;
|
||||
let classI = 0
|
||||
for (const matchingKey of matchingKeys) {
|
||||
const allKeys = Object.keys(currentSheet).filter(key => key !== matchingKey && key.replace(/[a-z]/gi, '') == matchingKey.replace(/[a-z]/gi, ''));
|
||||
let final2 = [];
|
||||
const matchingCell = currentSheet.getCell(matchingKey)
|
||||
const rowNumber = matchingCell.row
|
||||
const allKeys = []
|
||||
|
||||
// Get all cells in the same row
|
||||
const row = currentSheet.getRow(rowNumber)
|
||||
row.eachCell((cell, colNumber) => {
|
||||
if (cell.address !== matchingKey) {
|
||||
allKeys.push(cell.address)
|
||||
}
|
||||
})
|
||||
|
||||
let final2 = []
|
||||
|
||||
for (const key of allKeys) {
|
||||
const parsedKey = letterToNumber(key.replace(/[0-9]/gi, ''));
|
||||
let d = true;
|
||||
const cell = currentSheet.getCell(key)
|
||||
const parsedKey = letterToNumber(key.replace(/[0-9]/gi, ""))
|
||||
let d = true
|
||||
|
||||
try {
|
||||
const regex = /^úklid\s+(?:\d+\s+)?[A-Za-z]{2}$/;
|
||||
if (regex.test(currentSheet[key]["w"].trim()) || currentSheet[key]["w"].trim().length == 0) {
|
||||
d = false;
|
||||
const regex = /^úklid\s+(?:\d+\s+)?[A-Za-z]{2}$/
|
||||
const cellText = cell.text || ""
|
||||
if (regex.test(cellText.trim()) || cellText.trim().length == 0 || cell.fill.fgColor === undefined) {
|
||||
d = false
|
||||
}
|
||||
} catch {}
|
||||
|
||||
if (d) {
|
||||
final2[parsedKey] = currentSheet[key]["w"];
|
||||
if (cell.fill.fgColor?.argb == "FFFFFF00") {
|
||||
cell.text += "\n(bude upřesněno)";
|
||||
}
|
||||
final2[parsedKey] = cell.text || ""
|
||||
} else {
|
||||
final2[parsedKey] = null;
|
||||
final2[parsedKey] = null
|
||||
}
|
||||
}
|
||||
|
||||
final2 = Array.from(final2, item => item === undefined ? null : item);
|
||||
final2 = Array.from(final2, (item) => (item === undefined ? null : item))
|
||||
while (final2.length < 10) {
|
||||
final2.push(null);
|
||||
final2.push(null)
|
||||
}
|
||||
|
||||
final[finalIndex][classes[classI]] = final2.slice(1, 11);;
|
||||
final[finalIndex][classes[classI]] = final2.slice(1, 11)
|
||||
|
||||
classI++;
|
||||
classI++
|
||||
}
|
||||
|
||||
// ABSENCE
|
||||
final[finalIndex]["ABSENCE"] = [];
|
||||
const absenceKey = Object.keys(currentSheet).find(key => {
|
||||
const value = (typeof currentSheet[key].v == "string" ? currentSheet[key].v : "").trim().toLowerCase();
|
||||
return value == "absence";
|
||||
});
|
||||
const teacherMap = await parseTeachers();
|
||||
const allAbsenceKeys = Object.keys(currentSheet).filter(key => key !== absenceKey && key.replace(/[a-z]/gi, '') == absenceKey.replace(/[a-z]/gi, ''));
|
||||
let i = 0;
|
||||
for (const absenceKeyCur of allAbsenceKeys) {
|
||||
if (i >= 10) {
|
||||
break; // stop once 10 items are added
|
||||
}
|
||||
i++;
|
||||
final[finalIndex]["ABSENCE"] = []
|
||||
let absenceKey = null
|
||||
|
||||
const value = currentSheet[absenceKeyCur]["v"].trim();
|
||||
if (value.length === 0) {
|
||||
continue;
|
||||
}
|
||||
currentSheet.eachRow((row, rowNumber) => {
|
||||
row.eachCell((cell, colNumber) => {
|
||||
const value = (typeof cell.value === "string" ? cell.value : "").trim().toLowerCase()
|
||||
if (value === "absence") {
|
||||
absenceKey = cell.address
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
const data = parseAbsence(value, teacherMap);
|
||||
final[finalIndex]["ABSENCE"].push(data);
|
||||
if (absenceKey) {
|
||||
const teacherMap = await parseTeachers()
|
||||
const absenceCell = currentSheet.getCell(absenceKey)
|
||||
const rowNumber = absenceCell.row
|
||||
const allAbsenceKeys = []
|
||||
|
||||
// Get all cells in the same row as absence
|
||||
const row = currentSheet.getRow(rowNumber)
|
||||
row.eachCell((cell, colNumber) => {
|
||||
if (cell.address !== absenceKey) {
|
||||
allAbsenceKeys.push(cell.address)
|
||||
}
|
||||
})
|
||||
|
||||
let i = 0
|
||||
for (const absenceKeyCur of allAbsenceKeys) {
|
||||
if (i >= 10) {
|
||||
break // stop once 10 items are added
|
||||
}
|
||||
i++
|
||||
|
||||
const cell = currentSheet.getCell(absenceKeyCur)
|
||||
const value = (cell.value || "").toString().trim()
|
||||
if (value.length === 0) {
|
||||
continue
|
||||
}
|
||||
|
||||
const data = parseAbsence(value, teacherMap)
|
||||
final[finalIndex]["ABSENCE"].push(data)
|
||||
}
|
||||
}
|
||||
|
||||
finalIndex++;
|
||||
finalIndex++
|
||||
}
|
||||
|
||||
const data = {
|
||||
schedule: final,
|
||||
props: upcomingSheets.map(str => {
|
||||
const dateMatch = str.match(/(\d{1,2})\.\s*(\d{1,2})\.\s*(\d{4})/);
|
||||
props: upcomingSheets.map((str) => {
|
||||
const dateMatch = str.match(/(\d{1,2})\.\s*(\d{1,2})\.\s*(\d{4})/)
|
||||
|
||||
let date = null;
|
||||
let date = null
|
||||
|
||||
if (dateMatch) {
|
||||
const day = parseInt(dateMatch[1], 10);
|
||||
const month = parseInt(dateMatch[2], 10);
|
||||
const year = parseInt(dateMatch[3], 10);
|
||||
const day = Number.parseInt(dateMatch[1], 10)
|
||||
const month = Number.parseInt(dateMatch[2], 10)
|
||||
const year = Number.parseInt(dateMatch[3], 10)
|
||||
|
||||
date = new Date(year, month - 1, day);
|
||||
date = new Date(year, month - 1, day)
|
||||
}
|
||||
|
||||
const isPriprava = str.toLowerCase().normalize("NFD").replace(/[\u0300-\u036f]/g, "").includes("priprava");
|
||||
const isPriprava = str
|
||||
.toLowerCase()
|
||||
.normalize("NFD")
|
||||
.replace(/[\u0300-\u036f]/g, "")
|
||||
.includes("priprava")
|
||||
|
||||
return {
|
||||
date: date
|
||||
? `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}-${String(date.getDate()).padStart(2, '0')}`
|
||||
? `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, "0")}-${String(date.getDate()).padStart(2, "0")}`
|
||||
: null,
|
||||
priprava: isPriprava,
|
||||
};
|
||||
}
|
||||
}),
|
||||
}
|
||||
|
||||
fs.writeFileSync('db/current.json', JSON.stringify(data));
|
||||
fs.writeFileSync("db/current.json", JSON.stringify(data))
|
||||
}
|
||||
|
||||
// parseThisShit("downloads/table.xlsx");
|
||||
parseThisShit("downloads/table.xlsx")
|
||||
|
||||
|
||||
/**
|
||||
* Get cell background color as hex (ARGB or theme + tint)
|
||||
* @param {import('exceljs').Cell} cell
|
||||
* @returns {string|null} Hex color like "#FF0000" or null if no fill
|
||||
*/
|
||||
function getCellHexColor(cell) {
|
||||
if (!cell.fill || !cell.fill.fgColor) return null;
|
||||
|
||||
const color = cell.fill.fgColor;
|
||||
|
||||
if (color.argb) {
|
||||
// Direct ARGB color
|
||||
return `#${color.argb.substring(2)}`;
|
||||
}
|
||||
|
||||
if (color.theme !== undefined) {
|
||||
// Default Excel 2013 theme colors
|
||||
const themeColors = [
|
||||
'FFFFFF', '000000', 'EEECE1', '1F497D', '4F81BD', 'C0504D', '9BBB59', '8064A2', '4BACC6', 'F79646'
|
||||
];
|
||||
let hex = themeColors[color.theme] || '000000';
|
||||
const tint = color.tint || 0;
|
||||
|
||||
// Apply tint inline
|
||||
let r = parseInt(hex.substring(0, 2), 16);
|
||||
let g = parseInt(hex.substring(2, 4), 16);
|
||||
let b = parseInt(hex.substring(4, 6), 16);
|
||||
|
||||
if (tint < 0) {
|
||||
r = r * (1 + tint);
|
||||
g = g * (1 + tint);
|
||||
b = b * (1 + tint);
|
||||
} else {
|
||||
r = r + (255 - r) * tint;
|
||||
g = g + (255 - g) * tint;
|
||||
b = b + (255 - b) * tint;
|
||||
}
|
||||
|
||||
r = Math.round(Math.min(255, Math.max(0, r)));
|
||||
g = Math.round(Math.min(255, Math.max(0, g)));
|
||||
b = Math.round(Math.min(255, Math.max(0, b)));
|
||||
|
||||
return `#${r.toString(16).padStart(2,'0')}${g.toString(16).padStart(2,'0')}${b.toString(16).padStart(2,'0')}`;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user