initial commit
This commit is contained in:
119
scrape/parse.js
Normal file
119
scrape/parse.js
Normal file
@@ -0,0 +1,119 @@
|
||||
const XLSX = require('xlsx');
|
||||
const fs = require("fs");
|
||||
|
||||
function parseThisShit(downloadedFilePath) {
|
||||
const workbook = XLSX.readFile(downloadedFilePath);
|
||||
const sheetNames = workbook.SheetNames;
|
||||
|
||||
const dateRegex = /^(pondělí|úterý|středa|čtvrtek|pátek)\s+(\d{1,2})\.\s*(\d{1,2})\.\s*(20\d{2})/i;
|
||||
|
||||
// Get today's date for comparison
|
||||
function getCurrentDateObject() {
|
||||
const now = new Date();
|
||||
return new Date(now.getFullYear(), now.getMonth(), now.getDate());
|
||||
}
|
||||
|
||||
const today = getCurrentDateObject();
|
||||
|
||||
const upcomingSheets = sheetNames.filter(name => {
|
||||
const match = name.match(dateRegex);
|
||||
if (!match) return false;
|
||||
|
||||
const day = parseInt(match[2], 10);
|
||||
const month = parseInt(match[3], 10) - 1; // JavaScript months are 0-indexed
|
||||
const year = parseInt(match[4], 10);
|
||||
|
||||
const sheetDate = new Date(year, month, day);
|
||||
|
||||
return sheetDate >= today;
|
||||
});
|
||||
|
||||
const final = [];
|
||||
|
||||
let finalIndex = 0;
|
||||
for (const key of upcomingSheets) {
|
||||
const currentSheet = workbook.Sheets[key];
|
||||
final.push({});
|
||||
|
||||
const regex = /[AEC][0-4][a-c]?\/.*/s;
|
||||
const prefixRegex = /[AEC][0-4][a-c]?/;
|
||||
const classes = [];
|
||||
const matchingKeys = Object.keys(currentSheet).filter(key => {
|
||||
const value = currentSheet[key];
|
||||
|
||||
const testResult = regex.test(value.v);
|
||||
if (testResult) {
|
||||
const prefixMatch = value.v.match(prefixRegex);
|
||||
if (prefixMatch) {
|
||||
const prefix = prefixMatch[0];
|
||||
classes.push(prefix);
|
||||
}
|
||||
}
|
||||
|
||||
return testResult;
|
||||
});
|
||||
|
||||
function letterToNumber(letter) {
|
||||
return letter.toLowerCase().charCodeAt(0) - 'a'.charCodeAt(0);
|
||||
}
|
||||
|
||||
// For each class
|
||||
let classI = 0;
|
||||
for (const matchingKey of matchingKeys) {
|
||||
const allKeys = Object.keys(currentSheet).filter(key => key !== matchingKey && key.endsWith(matchingKey.replace(/[a-z]/gi, '')));
|
||||
|
||||
const final2 = [];
|
||||
|
||||
for (const key of allKeys) {
|
||||
const parsedKey = letterToNumber(key.replace(/[0-9]/gi, ''));
|
||||
let d = true;
|
||||
|
||||
try {
|
||||
if (currentSheet[key]["w"].startsWith("úklid")) {
|
||||
d = false;
|
||||
}
|
||||
} catch {}
|
||||
|
||||
if (d) {
|
||||
final2[parsedKey] = currentSheet[key]["w"];
|
||||
} else {
|
||||
final2[parsedKey] = null;
|
||||
}
|
||||
}
|
||||
|
||||
final[finalIndex][classes[classI]] = final2.slice(1, 11);;
|
||||
|
||||
classI++;
|
||||
}
|
||||
|
||||
finalIndex++;
|
||||
}
|
||||
|
||||
const data = {
|
||||
schedule: final,
|
||||
props: upcomingSheets.map(str => {
|
||||
const dateMatch = str.match(/(\d{1,2})\.\s*(\d{1,2})\.\s*(\d{4})/);
|
||||
|
||||
let date = null;
|
||||
|
||||
if (dateMatch) {
|
||||
const day = parseInt(dateMatch[1], 10);
|
||||
const month = parseInt(dateMatch[2], 10);
|
||||
const year = parseInt(dateMatch[3], 10);
|
||||
|
||||
date = new Date(year, month - 1, day);
|
||||
}
|
||||
|
||||
const isPriprava = /priprava/i.test(str);
|
||||
|
||||
return {
|
||||
date: date ? date.toISOString().slice(0,10) : null, // ISO yyyy-mm-dd string for easy use
|
||||
priprava: isPriprava,
|
||||
};
|
||||
}),
|
||||
}
|
||||
|
||||
fs.writeFileSync('db/current.json', JSON.stringify(data));
|
||||
}
|
||||
|
||||
module.exports = parseThisShit;
|
||||
Reference in New Issue
Block a user