Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 08:58:00 +08:00
commit ef9c3b3679
11 changed files with 1118 additions and 0 deletions

View File

@@ -0,0 +1,77 @@
#!/usr/bin/env node
/**
* Check if an element exists on a page and return its properties
* Usage: node check-element.js <url> <selector>
* Returns: Compact JSON with element info
*/
const { chromium } = require('playwright');
async function checkElement(url, selector) {
const browser = await chromium.launch({ headless: true });
const page = await browser.newPage();
try {
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
// Wait briefly for dynamic content
await page.waitForTimeout(1000);
const element = await page.$(selector);
if (!element) {
await browser.close();
return { found: false, selector };
}
// Extract only essential information
const info = await element.evaluate((el) => {
const rect = el.getBoundingClientRect();
return {
found: true,
tagName: el.tagName,
text: el.textContent?.trim().substring(0, 100) || '',
visible: rect.width > 0 && rect.height > 0,
enabled: !el.disabled,
attributes: {
id: el.id || null,
class: el.className || null,
type: el.type || null,
href: el.href || null,
value: el.value || null
}
};
});
await browser.close();
return info;
} catch (error) {
await browser.close();
return {
found: false,
error: error.message.substring(0, 100)
};
}
}
// Main execution
if (require.main === module) {
const args = process.argv.slice(2);
if (args.length < 2) {
console.log(JSON.stringify({
error: 'Usage: node check-element.js <url> <selector>'
}));
process.exit(1);
}
const [url, selector] = args;
checkElement(url, selector)
.then(result => console.log(JSON.stringify(result, null, 2)))
.catch(error => console.log(JSON.stringify({ error: error.message })));
}
module.exports = { checkElement };

View File

@@ -0,0 +1,105 @@
#!/usr/bin/env node
/**
* Extract text content from a specific element or entire page
* Usage: node get-text.js <url> [selector]
* If selector is omitted, extracts visible text from body
* Returns: Compact JSON with extracted text
*/
const { chromium } = require('playwright');
async function getText(url, selector = null) {
const browser = await chromium.launch({ headless: true });
const page = await browser.newPage();
try {
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
await page.waitForTimeout(1000);
let text;
let found = true;
if (selector) {
const element = await page.$(selector);
if (!element) {
await browser.close();
return { found: false, selector };
}
text = await element.textContent();
} else {
// Extract visible text from body, excluding script/style tags
text = await page.evaluate(() => {
const body = document.body;
const walker = document.createTreeWalker(
body,
NodeFilter.SHOW_TEXT,
{
acceptNode: (node) => {
const parent = node.parentElement;
if (!parent) return NodeFilter.FILTER_REJECT;
const tag = parent.tagName.toLowerCase();
if (['script', 'style', 'noscript'].includes(tag)) {
return NodeFilter.FILTER_REJECT;
}
const style = window.getComputedStyle(parent);
if (style.display === 'none' || style.visibility === 'hidden') {
return NodeFilter.FILTER_REJECT;
}
return NodeFilter.FILTER_ACCEPT;
}
}
);
let text = '';
let node;
while (node = walker.nextNode()) {
text += node.textContent + ' ';
}
return text;
});
}
await browser.close();
// Clean and truncate text
const cleaned = text.trim().replace(/\s+/g, ' ');
return {
found,
text: cleaned.substring(0, 2000), // Limit to 2000 chars
length: cleaned.length,
truncated: cleaned.length > 2000
};
} catch (error) {
await browser.close();
return {
found: false,
error: error.message.substring(0, 100)
};
}
}
// Main execution
if (require.main === module) {
const args = process.argv.slice(2);
if (args.length < 1) {
console.log(JSON.stringify({
error: 'Usage: node get-text.js <url> [selector]'
}));
process.exit(1);
}
const [url, selector] = args;
getText(url, selector)
.then(result => console.log(JSON.stringify(result, null, 2)))
.catch(error => console.log(JSON.stringify({ error: error.message })));
}
module.exports = { getText };

View File

@@ -0,0 +1,112 @@
#!/usr/bin/env node
/**
* Navigate to a page and extract structured data
* Usage: node navigate-and-extract.js <url> <extraction-config-json>
* Config format: {"selectors": {"name": "selector", ...}, "waitFor": "selector"}
* Returns: Compact JSON with extracted data
*/
const { chromium } = require('playwright');
async function navigateAndExtract(url, config) {
const browser = await chromium.launch({ headless: true });
const page = await browser.newPage();
try {
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
// Wait for specific element if specified
if (config.waitFor) {
await page.waitForSelector(config.waitFor, { timeout: 10000 }).catch(() => {});
} else {
await page.waitForTimeout(1000);
}
const results = {};
// Extract data for each selector
for (const [name, selector] of Object.entries(config.selectors || {})) {
try {
const element = await page.$(selector);
if (element) {
const text = await element.textContent();
results[name] = text.trim().substring(0, 200);
} else {
results[name] = null;
}
} catch (e) {
results[name] = null;
}
}
// Count elements if requested
if (config.counts) {
for (const [name, selector] of Object.entries(config.counts)) {
try {
const count = await page.$$(selector).then(els => els.length);
results[name] = count;
} catch (e) {
results[name] = 0;
}
}
}
// Check for element visibility if requested
if (config.checks) {
for (const [name, selector] of Object.entries(config.checks)) {
try {
const element = await page.$(selector);
results[name] = element !== null && await element.isVisible();
} catch (e) {
results[name] = false;
}
}
}
await browser.close();
return {
success: true,
url,
data: results
};
} catch (error) {
await browser.close();
return {
success: false,
error: error.message.substring(0, 100)
};
}
}
// Main execution
if (require.main === module) {
const args = process.argv.slice(2);
if (args.length < 2) {
console.log(JSON.stringify({
error: 'Usage: node navigate-and-extract.js <url> <config-json>'
}));
process.exit(1);
}
const [url, configJson] = args;
let config;
try {
config = JSON.parse(configJson);
} catch (e) {
console.log(JSON.stringify({
error: 'Invalid JSON config: ' + e.message
}));
process.exit(1);
}
navigateAndExtract(url, config)
.then(result => console.log(JSON.stringify(result, null, 2)))
.catch(error => console.log(JSON.stringify({ error: error.message })));
}
module.exports = { navigateAndExtract };

View File

@@ -0,0 +1,11 @@
{
"name": "browser-automation-scripts",
"version": "1.0.0",
"description": "Browser automation scripts using Playwright",
"scripts": {
"install-browsers": "playwright install chromium"
},
"dependencies": {
"playwright": "^1.40.0"
}
}

View File

@@ -0,0 +1,70 @@
#!/usr/bin/env node
/**
* Take a screenshot of a page or specific element
* Usage: node take-screenshot.js <url> <output-path> [selector]
* If selector is provided, captures only that element
* Returns: Compact JSON with screenshot info
*/
const { chromium } = require('playwright');
const path = require('path');
async function takeScreenshot(url, outputPath, selector = null) {
const browser = await chromium.launch({ headless: true });
const page = await browser.newPage();
try {
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
await page.waitForTimeout(1000);
const absolutePath = path.resolve(outputPath);
if (selector) {
const element = await page.$(selector);
if (!element) {
await browser.close();
return { success: false, error: `Element not found: ${selector}` };
}
await element.screenshot({ path: absolutePath });
} else {
await page.screenshot({ path: absolutePath, fullPage: true });
}
await browser.close();
return {
success: true,
path: absolutePath,
url,
selector: selector || 'full-page'
};
} catch (error) {
await browser.close();
return {
success: false,
error: error.message.substring(0, 100)
};
}
}
// Main execution
if (require.main === module) {
const args = process.argv.slice(2);
if (args.length < 2) {
console.log(JSON.stringify({
error: 'Usage: node take-screenshot.js <url> <output-path> [selector]'
}));
process.exit(1);
}
const [url, outputPath, selector] = args;
takeScreenshot(url, outputPath, selector)
.then(result => console.log(JSON.stringify(result, null, 2)))
.catch(error => console.log(JSON.stringify({ error: error.message })));
}
module.exports = { takeScreenshot };