1
0

initial commit

This commit is contained in:
2025-08-07 00:17:31 +02:00
commit 47a7c73316
11 changed files with 2676 additions and 0 deletions

1
.dockerignore Normal file
View File

@@ -0,0 +1 @@
volume/

4
.gitignore vendored Normal file
View File

@@ -0,0 +1,4 @@
node_modules
volume/browser
db
downloads

20
Dockerfile Normal file
View File

@@ -0,0 +1,20 @@
# Use official Node.js image as base
FROM node:18
# Create app directory
WORKDIR /usr/src/app
# Copy package.json and package-lock.json (if available)
COPY package*.json ./
# Install dependencies
RUN npm install
# Copy app source code
COPY . .
# Expose the port your app runs on (optional, depends on your app)
EXPOSE 3000
# Start the app
CMD ["npm", "start"]

22
cron-runner.js Normal file
View File

@@ -0,0 +1,22 @@
const cron = require('node-cron');
const { exec } = require('child_process');
function runScraper() {
console.log('Running scraper...');
exec('node scrape/scraper.js', (error, stdout, stderr) => {
if (error) {
console.error(`Scraper error: ${error.message}`);
return;
}
if (stderr) console.error(`Scraper stderr: ${stderr}`);
if (stdout) console.log(`Scraper output:\n${stdout}`);
});
}
// Run immediately at start
runScraper();
// Schedule to run every 10 minutes
cron.schedule('*/10 * * * *', runScraper);
console.log('Cron scheduler started. Scraper will run every 10 minutes.');

12
docker-compose.yml Normal file
View File

@@ -0,0 +1,12 @@
version: "3.9"
services:
app:
build: .
ports:
- "3000:3000"
environment:
NODE_ENV: development
volumes:
- ./volume:./usr/src/app/volume
command: npm start

2312
package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

22
package.json Normal file
View File

@@ -0,0 +1,22 @@
{
"name": "tablescraper",
"version": "1.0.0",
"description": "",
"license": "ISC",
"author": "Jakub Žitník",
"type": "commonjs",
"main": "server.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1",
"start": "concurrently \"node server.js\" \"node cron-runner.js\""
},
"dependencies": {
"axios": "^1.9.0",
"concurrently": "^9.2.0",
"express": "^5.1.0",
"node-cron": "^4.2.1",
"openai": "^5.0.2",
"puppeteer": "^24.10.0",
"xlsx": "^0.18.5"
}
}

119
scrape/parse.js Normal file
View File

@@ -0,0 +1,119 @@
const XLSX = require('xlsx');
const fs = require("fs");
function parseThisShit(downloadedFilePath) {
const workbook = XLSX.readFile(downloadedFilePath);
const sheetNames = workbook.SheetNames;
const dateRegex = /^(pondělí|úterý|středa|čtvrtek|pátek)\s+(\d{1,2})\.\s*(\d{1,2})\.\s*(20\d{2})/i;
// Get today's date for comparison
function getCurrentDateObject() {
const now = new Date();
return new Date(now.getFullYear(), now.getMonth(), now.getDate());
}
const today = getCurrentDateObject();
const upcomingSheets = sheetNames.filter(name => {
const match = name.match(dateRegex);
if (!match) return false;
const day = parseInt(match[2], 10);
const month = parseInt(match[3], 10) - 1; // JavaScript months are 0-indexed
const year = parseInt(match[4], 10);
const sheetDate = new Date(year, month, day);
return sheetDate >= today;
});
const final = [];
let finalIndex = 0;
for (const key of upcomingSheets) {
const currentSheet = workbook.Sheets[key];
final.push({});
const regex = /[AEC][0-4][a-c]?\/.*/s;
const prefixRegex = /[AEC][0-4][a-c]?/;
const classes = [];
const matchingKeys = Object.keys(currentSheet).filter(key => {
const value = currentSheet[key];
const testResult = regex.test(value.v);
if (testResult) {
const prefixMatch = value.v.match(prefixRegex);
if (prefixMatch) {
const prefix = prefixMatch[0];
classes.push(prefix);
}
}
return testResult;
});
function letterToNumber(letter) {
return letter.toLowerCase().charCodeAt(0) - 'a'.charCodeAt(0);
}
// For each class
let classI = 0;
for (const matchingKey of matchingKeys) {
const allKeys = Object.keys(currentSheet).filter(key => key !== matchingKey && key.endsWith(matchingKey.replace(/[a-z]/gi, '')));
const final2 = [];
for (const key of allKeys) {
const parsedKey = letterToNumber(key.replace(/[0-9]/gi, ''));
let d = true;
try {
if (currentSheet[key]["w"].startsWith("úklid")) {
d = false;
}
} catch {}
if (d) {
final2[parsedKey] = currentSheet[key]["w"];
} else {
final2[parsedKey] = null;
}
}
final[finalIndex][classes[classI]] = final2.slice(1, 11);;
classI++;
}
finalIndex++;
}
const data = {
schedule: final,
props: upcomingSheets.map(str => {
const dateMatch = str.match(/(\d{1,2})\.\s*(\d{1,2})\.\s*(\d{4})/);
let date = null;
if (dateMatch) {
const day = parseInt(dateMatch[1], 10);
const month = parseInt(dateMatch[2], 10);
const year = parseInt(dateMatch[3], 10);
date = new Date(year, month - 1, day);
}
const isPriprava = /priprava/i.test(str);
return {
date: date ? date.toISOString().slice(0,10) : null, // ISO yyyy-mm-dd string for easy use
priprava: isPriprava,
};
}),
}
fs.writeFileSync('db/current.json', JSON.stringify(data));
}
module.exports = parseThisShit;

133
scrape/scraper.js Normal file
View File

@@ -0,0 +1,133 @@
const puppeteer = require('puppeteer');
const path = require('path');
const fs = require('fs');
const parseThisShit = require('./parse');
const EMAIL = process.env.EMAIL;
const PASSWORD = process.env.PASSWORD;
//const SHAREPOINT_URL = 'https://onedrive.live.com/personal/7d8c4d9baeeebde3/_layouts/15/doc2.aspx?resid=2bddf9b7-8613-4ae3-a684-0be6d73d90bf&cid=7d8c4d9baeeebde3&ct=1748937302474&wdOrigin=OFFICECOM-WEB.START.UPLOAD&wdPreviousSessionSrc=HarmonyWeb&wdPreviousSession=ce7df0ab-aade-4df2-9e2e-492e99049666';
const SHAREPOINT_URL = 'https://spsejecnacz.sharepoint.com/:x:/s/nastenka/EbA_RcWKRdRNlB8YU1iuWM4BnMetCQlVm8toHuuyW-TPyA?e=uu3iPR&CID=2686cea0-2d06-3304-4519-087fb9e06fd0';
const VOLUME_PATH = path.resolve('./volume/browser');
async function clearDownloadsFolder() {
try {
await fs.promises.rm('./downloads', { recursive: true, force: true });
await fs.promises.mkdir('./downloads');
} catch (err) {
console.error('Error:', err);
}
}
(async () => {
const browser = await puppeteer.launch({
headless: 'new',
//headless: false,
userDataDir: VOLUME_PATH
});
const [page] = await browser.pages();
const downloadPath = path.resolve('./downloads');
if (!fs.existsSync(downloadPath)) fs.mkdirSync(downloadPath);
const client = await page.target().createCDPSession();
await client.send('Page.setDownloadBehavior', {
behavior: 'allow',
downloadPath: downloadPath,
});
await page.goto(SHAREPOINT_URL, { waitUntil: 'networkidle2' });
await new Promise(r => setTimeout(r, 3000));
if (page.url().includes('login.')) {
console.log('Logging in...');
await page.waitForSelector('input[type="email"]', { timeout: 10000 });
await page.type('input[type="email"]', EMAIL, { delay: 50 });
await page.keyboard.press('Enter');
await new Promise(r => setTimeout(r, 2000));
await page.waitForSelector('input[type="password"]', { timeout: 10000 });
await page.type('input[type="password"]', PASSWORD, { delay: 50 });
await page.keyboard.press('Enter');
try {
await page.waitForSelector('input[type="submit"]', { timeout: 10000 });
await page.click('input[type="submit"]');
} catch {
console.log('No stay signed in prompt.');
}
// wait for navigation after login
await page.waitForNavigation({ waitUntil: 'networkidle2' });
}
// Wait for iframe containing file options
const frameHandle = await page.waitForSelector('iframe');
const frame = await frameHandle.contentFrame();
await frame.waitForSelector('button[title="File"]', { timeout: 60000 });
await frame.click('button[title="File"]');
await new Promise(r => setTimeout(r, 500));
// Click "Create a Copy"
try {
await frame.waitForSelector('div[role="menuitem"][name="Create a Copy"]', { visible: true });
} catch {
await frame.click('button[title="File"]');
await new Promise(r => setTimeout(r, 500));
}
await frame.click('div[role="menuitem"][name="Create a Copy"]');
await new Promise(r => setTimeout(r, 500));
// Click "Download a Copy"
await frame.waitForSelector('div[role="menuitem"][name="Download a Copy"]', { visible: true });
await frame.click('div[role="menuitem"][name="Download a Copy"]');
// Wait some seconds for download to start
await new Promise(r => setTimeout(r, 10000));
// Helper: wait for file to appear in download folder
function waitForFile(filename, timeout = 30000) {
return new Promise((resolve, reject) => {
const start = Date.now();
const interval = setInterval(() => {
if (fs.existsSync(filename)) {
clearInterval(interval);
resolve();
} else if (Date.now() - start > timeout) {
clearInterval(interval);
reject(new Error('Timeout waiting for file'));
}
}, 500);
});
}
// Helper: get newest .xlsx file in downloads folder
function getNewestFile(dir, ext = '.xlsx') {
const files = fs.readdirSync(dir)
.filter(f => f.endsWith(ext))
.map(f => ({
name: f,
time: fs.statSync(path.join(dir, f)).mtime.getTime()
}))
.sort((a, b) => b.time - a.time);
return files.length ? path.join(dir, files[0].name) : null;
}
// Wait for the downloaded file
const downloadedFilePath = getNewestFile(downloadPath, '.xlsx');
if (!downloadedFilePath) {
throw new Error('No XLSX file found in download folder');
}
console.log('Waiting for file:', downloadedFilePath);
await waitForFile(downloadedFilePath);
parseThisShit(downloadedFilePath);
await clearDownloadsFolder();
await browser.close();
})();

27
server.js Normal file
View File

@@ -0,0 +1,27 @@
const express = require("express");
const path = require("path");
const app = express();
const fs = require("fs/promises");
const PORT = 3000;
app.get('/', (_, res) => {
res.sendFile(path.join(__dirname, "db", "current.json"));
});
app.get("/status", async (_, res) => {
const dataStr = await fs.readFile(path.resolve("./volume/customState.json"), {encoding: "utf8"});
const data = JSON.parse(dataStr);
if (data.working) {
res.json({working: true})
} else {
res.json({working: data.working, message: data.message})
}
})
// TODO: Reporting errors
app.listen(PORT, () => {
console.log(`Server is running at http://localhost:${PORT}`);
});

4
volume/customState.json Normal file
View File

@@ -0,0 +1,4 @@
{
"working": true,
"message": ""
}