diff --git a/.github/workflows/blog-syndication.yml b/.github/workflows/blog-syndication.yml new file mode 100644 index 0000000000..dee7470bb5 --- /dev/null +++ b/.github/workflows/blog-syndication.yml @@ -0,0 +1,112 @@ +name: Syndicate Blog Posts + +on: + schedule: + # Daily at 13:00 UTC. Runs from the default branch only, per GitHub's cron rules. + - cron: '0 13 * * *' + workflow_dispatch: + inputs: + dry_run: + description: 'Skip API calls and only print what would happen.' + type: boolean + default: false + +permissions: + contents: write + +concurrency: + group: blog-syndication + cancel-in-progress: false + +jobs: + syndicate: + runs-on: ubuntu-latest + steps: + - name: Check out repository + uses: actions/checkout@v4 + with: + # Token with write scope so the post-run commit can push the state file. + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Run API syndication script + id: syndicate_api + env: + DEVTO_API_KEY: ${{ secrets.DEVTO_API_KEY }} + HASHNODE_TOKEN: ${{ secrets.HASHNODE_TOKEN }} + HASHNODE_PUBLICATION_ID: ${{ secrets.HASHNODE_PUBLICATION_ID }} + run: | + set -euo pipefail + if [ "${{ inputs.dry_run }}" = "true" ]; then + python3 scripts/website/syndicate_blog_posts.py --dry-run + else + python3 scripts/website/syndicate_blog_posts.py + fi + + - name: Detect browser-syndication credentials + id: browser_creds + env: + FOOJAY_USER: ${{ secrets.FOOJAY_USER }} + run: | + if [ -n "${FOOJAY_USER}" ]; then + echo "any_configured=true" >> "${GITHUB_OUTPUT}" + else + echo "any_configured=false" >> "${GITHUB_OUTPUT}" + fi + + - name: Install markdown package (for browser-extension queue HTML) + run: | + set -euo pipefail + pip install markdown + + - name: Install Playwright dependencies + if: ${{ steps.browser_creds.outputs.any_configured == 'true' }} + run: | + set -euo pipefail + pip install playwright + playwright install --with-deps chromium + + - name: Run browser syndication script + if: ${{ steps.browser_creds.outputs.any_configured == 'true' }} + env: + FOOJAY_USER: ${{ secrets.FOOJAY_USER }} + FOOJAY_PASSWORD: ${{ secrets.FOOJAY_PASSWORD }} + run: | + set -euo pipefail + if [ "${{ inputs.dry_run }}" = "true" ]; then + python3 scripts/website/syndicate_browser_posts.py --dry-run + else + python3 scripts/website/syndicate_browser_posts.py + fi + + - name: Queue browser-extension syndication tasks (Medium, DZone) + run: | + set -euo pipefail + python3 scripts/website/queue_browser_syndication.py + + - name: Upload syndication screenshots on failure + if: ${{ always() && hashFiles('docs/website/reports/syndication-screenshots/**/*.png') != '' }} + uses: actions/upload-artifact@v4 + with: + name: syndication-screenshots + path: docs/website/reports/syndication-screenshots/ + if-no-files-found: ignore + retention-days: 14 + + - name: Commit updated syndication state and queue + if: ${{ inputs.dry_run != true }} + run: | + set -euo pipefail + if git diff --quiet -- scripts/website/syndication-state.json scripts/website/syndication-queue.json; then + echo "No state or queue changes to commit." + exit 0 + fi + git config user.name 'github-actions[bot]' + git config user.email 'github-actions[bot]@users.noreply.github.com' + git add scripts/website/syndication-state.json scripts/website/syndication-queue.json + git commit -m "ci: record blog syndication results" + git push diff --git a/.gitignore b/.gitignore index 82e5eba372..89ef591e4d 100644 --- a/.gitignore +++ b/.gitignore @@ -30,6 +30,8 @@ **/dist/* *.zip CodenameOneDesigner/src/version.properties +*-storage-state.json +*-storage-state.*.json /Ports/iOSPort/build/ /Ports/iOSPort/dist/ Ports/iOSPort/nbproject/private/private.xml diff --git a/docs/website/reports/syndication-screenshots/.gitignore b/docs/website/reports/syndication-screenshots/.gitignore new file mode 100644 index 0000000000..5b502f9bd1 --- /dev/null +++ b/docs/website/reports/syndication-screenshots/.gitignore @@ -0,0 +1,2 @@ +*.png +!.gitignore diff --git a/scripts/syndication-extension/README.md b/scripts/syndication-extension/README.md new file mode 100644 index 0000000000..bbfe4e9a5d --- /dev/null +++ b/scripts/syndication-extension/README.md @@ -0,0 +1,97 @@ +# Codename One Syndicator (Firefox extension) + +Drives the Medium and DZone post editors from inside the user's logged-in +Firefox session, so syndication requests carry a real browser fingerprint +and `cf_clearance` cookie. This is the only way to syndicate to Medium / +DZone reliably — both sit behind aggressive Cloudflare bot detection that +rejects headless Playwright runs. + +## How it fits together + +``` + ┌───────────────────────────┐ ┌──────────────────────────┐ + │ Daily CI cron │ │ User's Firefox │ + │ blog-syndication.yml │ │ (this extension) │ + │ │ │ │ + │ 1. picks eligible posts │ │ 1. polls queue every │ + │ 2. publishes via APIs │ │ 30 min │ + │ (foojay, dev.to, │ │ 2. opens editor tab │ + │ hashnode) │ │ per pending task │ + │ 3. appends Medium/DZone │ ──────▶ │ 3. content script │ + │ tasks to │ poll │ fills editor + │ + │ syndication-queue │ via │ saves draft │ + │ .json (committed) │ raw.gh │ 4. shows JSON patch │ + └───────────────────────────┘ │ to paste back into │ + │ syndication-state │ + └──────────────────────────┘ +``` + +* CI does not run the browser. It only knows which posts are eligible and + appends a task entry per browser-only platform to + `scripts/website/syndication-queue.json`. That commit is what makes the + task visible to the extension. +* The extension polls the raw GitHub URL of that file. When the user's + Firefox is online, queued tasks get processed. There is no daily + schedule pressure — a 3-day Firefox-offline gap is fine. +* The extension writes results into its local `chrome.storage` and the + popup UI prints a JSON patch the user can paste into + `scripts/website/syndication-state.json` to record the syndication + permanently. (Round-tripping the result via a GitHub PR from inside the + extension would require a committed token; we keep that boundary simple.) + +## Install (Firefox) + +1. `about:debugging#/runtime/this-firefox` → **Load Temporary Add-on…** +2. Pick `scripts/syndication-extension/manifest.json`. +3. The icon shows up in the toolbar. Click it → **Poll syndication queue + now** to test against whatever is currently in + `syndication-queue.json`. + +For permanent install (across browser restarts) the extension needs to +be signed by Mozilla — out of scope for the first version. + +## Adapters + +Each target site has a content script that runs on its editor URL: + +* `adapters/medium.js` — Medium new-story editor. Types title, presses + Enter, pastes body HTML via `execCommand('insertHTML')`, opens Story + Settings panel, fills canonical URL. +* `adapters/dzone.js` — DZone Froala editor. Sets title and subtitle via + React-style native value setters, calls + `FroalaEditor.INSTANCES[0].html.set` for the body, clicks **Save draft**. + +To add a new platform (Bluesky, Mastodon, Threads, …): + +1. Add an entry under `EDITOR_URLS` in `background.js`. +2. Drop a new `adapters/.js` content script that reads the task + from `chrome.storage.local['task_for_']` and reports back via + `cn1Syndicator.report(taskId, { success, url })`. +3. Add a `content_scripts` entry in `manifest.json` for that editor URL. +4. Have CI append `{ "site": "", … }` task entries. + +## Producing the queue (CI side) + +`scripts/website/queue_browser_syndication.py` walks the same eligible- +posts logic the API syndicator uses, then appends a task per browser +platform to `scripts/website/syndication-queue.json` (skipping anything +already in `syndication-state.json` or already in the queue). + +Daily workflow runs: + +```bash +python3 scripts/website/queue_browser_syndication.py --platforms medium,dzone +``` + +Then commits the queue file back to master so the next extension poll +picks it up. + +## Caveats + +* The extension is unsigned, so a temporary install must be re-loaded + after every Firefox restart unless you self-sign or run from a + Developer Edition with `xpinstall.signatures.required` disabled. +* Adapter selectors break when target sites redesign. Each adapter is a + small, scoped file — fix the broken selectors and reload the extension. +* The queue is durable because it lives in the repo. A 3-day Firefox- + offline gap just means the tasks process when the user is back. diff --git a/scripts/syndication-extension/adapters/common.js b/scripts/syndication-extension/adapters/common.js new file mode 100644 index 0000000000..d24aa06e8a --- /dev/null +++ b/scripts/syndication-extension/adapters/common.js @@ -0,0 +1,61 @@ +// Helpers shared by every adapter. + +window.cn1Syndicator = window.cn1Syndicator || {}; + +window.cn1Syndicator.waitFor = function (predicate, { timeout = 30000, interval = 200 } = {}) { + return new Promise((resolve, reject) => { + const deadline = Date.now() + timeout; + const tick = () => { + try { + const value = predicate(); + if (value) { + resolve(value); + return; + } + } catch (err) { + // ignore until timeout + } + if (Date.now() > deadline) { + reject(new Error(`waitFor timed out after ${timeout}ms`)); + return; + } + setTimeout(tick, interval); + }; + tick(); + }); +}; + +window.cn1Syndicator.setReactValue = function (element, value) { + if (!element) return false; + const proto = Object.getPrototypeOf(element); + const setter = Object.getOwnPropertyDescriptor(proto, "value").set; + setter.call(element, value); + element.dispatchEvent(new Event("input", { bubbles: true })); + return true; +}; + +window.cn1Syndicator.report = function (taskId, payload) { + chrome.runtime.sendMessage({ type: "syndication-complete", task_id: taskId, ...payload }); +}; + +window.cn1Syndicator.getTaskFor = async function (site) { + const key = `task_for_${site}`; + const data = await chrome.storage.local.get(key); + return data[key] || null; +}; + +window.cn1Syndicator.downloadAsFile = async function (url, fileName) { + // Returns a File object suitable for handing to a hidden file input. + const resp = await fetch(url); + if (!resp.ok) throw new Error(`download ${url} -> ${resp.status}`); + const blob = await resp.blob(); + return new File([blob], fileName, { type: blob.type || "image/jpeg" }); +}; + +window.cn1Syndicator.attachFile = function (input, file) { + // Programmatically populate a hidden . + const dt = new DataTransfer(); + dt.items.add(file); + input.files = dt.files; + input.dispatchEvent(new Event("change", { bubbles: true })); +}; diff --git a/scripts/syndication-extension/adapters/dzone.js b/scripts/syndication-extension/adapters/dzone.js new file mode 100644 index 0000000000..18a5a241b7 --- /dev/null +++ b/scripts/syndication-extension/adapters/dzone.js @@ -0,0 +1,52 @@ +// DZone adapter. Runs on https://dzone.com/content/article/post.html. +// +// DZone's editor is Froala. Title is a textarea (Angular-bound), body lives +// in window.FroalaEditor.INSTANCES[0]. The save mechanism is the "Save draft" +// button — Cloudflare doesn't challenge it because the request originates +// from the user's already-trusted browser session. + +(async () => { + const { waitFor, setReactValue, report, getTaskFor } = window.cn1Syndicator; + const task = await getTaskFor("dzone"); + if (!task) return; + console.log("[dzone-adapter] picked up task", task.slug); + + try { + // Title (Angular ng-model) + const title = await waitFor(() => document.querySelector("textarea[name='title']")); + setReactValue(title, task.title); + + // Subtitle / TL;DR — use description if present + if (task.description) { + const sub = document.querySelector("textarea[name='subtitle']"); + if (sub) setReactValue(sub, task.description.slice(0, 300)); + const meta = document.getElementById("meta-description-textarea"); + if (meta) setReactValue(meta, task.description.slice(0, 155)); + } + + // Body — set via Froala's JS API + if (task.body_html) { + await waitFor(() => window.FroalaEditor && window.FroalaEditor.INSTANCES && window.FroalaEditor.INSTANCES.length); + const inst = window.FroalaEditor.INSTANCES[0]; + inst.html.set(task.body_html); + if (inst.events && inst.events.trigger) inst.events.trigger("contentChanged"); + } + + // Wait a moment for Angular to digest the title and subtitle changes + // before clicking Save. + await new Promise((r) => setTimeout(r, 1500)); + + const save = Array.from(document.querySelectorAll("button")) + .find((b) => /^save\s*draft$/i.test((b.textContent || "").trim())); + if (!save) throw new Error("Save Draft button not found"); + save.click(); + + // After save, DZone keeps you on post.html or redirects to drafts list. + // Wait a few seconds then report. + await new Promise((r) => setTimeout(r, 6000)); + report(task.id, { success: true, url: location.href }); + } catch (err) { + console.error("[dzone-adapter] failed", err); + report(task.id, { success: false, error: String(err) }); + } +})(); diff --git a/scripts/syndication-extension/adapters/medium.js b/scripts/syndication-extension/adapters/medium.js new file mode 100644 index 0000000000..b852b850a4 --- /dev/null +++ b/scripts/syndication-extension/adapters/medium.js @@ -0,0 +1,61 @@ +// Medium adapter. Runs on https://medium.com/new-story. +// +// Medium has a single contenteditable div for both title and body. Type the +// title, press Enter, then paste body HTML via the document selection API +// (Medium's editor accepts HTML pastes and converts to its internal format). +// Set canonical via the Story Settings panel after the body is in place. + +(async () => { + const { waitFor, setReactValue, report, getTaskFor } = window.cn1Syndicator; + const task = await getTaskFor("medium"); + if (!task) return; + console.log("[medium-adapter] picked up task", task.slug); + + try { + const editor = await waitFor(() => document.querySelector("div.postArticle-content[contenteditable='true']")); + editor.focus(); + document.execCommand("selectAll", false); + document.execCommand("delete", false); + + // Type the title (Medium converts the first line to ) + document.execCommand("insertText", false, task.title); + document.execCommand("insertParagraph", false); + + // Paste body as HTML so headings/images/code render. + if (task.body_html) { + // execCommand insertHTML works in Medium's contenteditable. + document.execCommand("insertHTML", false, task.body_html); + } + + // Wait for Medium's auto-save to assign a draft URL (/p//edit). + await new Promise((r) => setTimeout(r, 4000)); + let draftUrl = location.href; + + // Story settings panel: click the gear/settings icon in the top bar + // (varies by layout — try a couple of selectors), find the + // "Customize canonical link" / canonical URL input, fill it. + try { + const gear = document.querySelector("button[aria-label*='Story settings' i], button[data-action='show-story-meta']"); + if (gear) { + gear.click(); + const canonical = await waitFor( + () => document.querySelector("input[placeholder*='canonical' i], input[placeholder*='URL of original' i]"), + { timeout: 8000 } + ); + setReactValue(canonical, task.canonical); + // Close the panel so auto-save fires + document.body.dispatchEvent(new KeyboardEvent("keydown", { key: "Escape", bubbles: true })); + } + } catch (err) { + console.warn("[medium-adapter] could not set canonical via panel", err); + } + + // One last wait so auto-save settles after the canonical change. + await new Promise((r) => setTimeout(r, 4000)); + draftUrl = location.href; + report(task.id, { success: true, url: draftUrl }); + } catch (err) { + console.error("[medium-adapter] failed", err); + report(task.id, { success: false, error: String(err) }); + } +})(); diff --git a/scripts/syndication-extension/background.js b/scripts/syndication-extension/background.js new file mode 100644 index 0000000000..c748813ae6 --- /dev/null +++ b/scripts/syndication-extension/background.js @@ -0,0 +1,124 @@ +// Background service worker for the Codename One Syndicator extension. +// +// Polls a JSON queue file in the repo at a slow cadence (default 30 min) and, +// for each pending task, opens the relevant editor in a new tab. Each +// adapter content script picks up the task from chrome.storage when its tab +// loads, fills the editor, saves a draft, and reports back here so the task +// can be marked complete. +// +// The queue file lives at: +// https://raw.githubusercontent.com/codenameone/CodenameOne/master/ +// scripts/website/syndication-queue.json +// +// Format: +// { +// "tasks": [ +// { +// "id": "", +// "site": "medium" | "dzone", +// "slug": "", +// "title": "", +// "canonical": "", +// "body_html": "", +// "description": "", +// "cover_image_url": "" +// } +// ] +// } +// +// Once an adapter completes a task, results land in chrome.storage under +// `completed_tasks`. The popup UI surfaces them so the user can manually +// commit the matching state-file update back to the repo. (Round-tripping +// the result via a GitHub PR from inside the extension would require a +// committed token; we keep the trust boundary simple by leaving that to +// the user.) + +const QUEUE_URL = "https://raw.githubusercontent.com/codenameone/CodenameOne/master/scripts/website/syndication-queue.json"; +const POLL_INTERVAL_MINUTES = 30; +const EDITOR_URLS = { + medium: "https://medium.com/new-story", + dzone: "https://dzone.com/content/article/post.html", +}; + +chrome.runtime.onInstalled.addListener(() => { + chrome.alarms.create("poll", { periodInMinutes: POLL_INTERVAL_MINUTES }); + chrome.storage.local.set({ completed_tasks: [], pending_tasks: [], last_poll: null }); +}); + +chrome.alarms.onAlarm.addListener((alarm) => { + if (alarm.name === "poll") void runPoll(); +}); + +chrome.action.onClicked.addListener(() => { + void runPoll(); +}); + +chrome.runtime.onMessage.addListener((msg, _sender, sendResponse) => { + if (msg && msg.type === "poll-now") { + runPoll().then(() => sendResponse({ ok: true })).catch((err) => sendResponse({ ok: false, error: String(err) })); + return true; // keep the channel open for the async response + } +}); + +async function runPoll() { + await chrome.storage.local.set({ last_poll: new Date().toISOString() }); + let queue; + try { + const resp = await fetch(QUEUE_URL, { cache: "no-store" }); + if (!resp.ok) { + console.warn("[syndicator] queue fetch failed", resp.status); + return; + } + queue = await resp.json(); + } catch (err) { + console.warn("[syndicator] queue fetch error", err); + return; + } + const completed = (await chrome.storage.local.get("completed_tasks")).completed_tasks || []; + const completedIds = new Set(completed.map((c) => c.id)); + const tasks = (queue.tasks || []).filter((t) => !completedIds.has(t.id)); + await chrome.storage.local.set({ pending_tasks: tasks }); + for (const task of tasks) { + await processTask(task); + } +} + +async function processTask(task) { + const editorUrl = EDITOR_URLS[task.site]; + if (!editorUrl) { + console.warn("[syndicator] unknown site", task.site); + return; + } + // Stash the task in storage keyed by site; the content script reads it on load. + await chrome.storage.local.set({ [`task_for_${task.site}`]: task }); + const tab = await chrome.tabs.create({ url: editorUrl, active: false }); + // Wait for completion message from the content script (with a long timeout) + // — the content script signals via chrome.runtime.sendMessage. + await new Promise((resolve) => { + const listener = (msg, sender) => { + if (msg && msg.type === "syndication-complete" && msg.task_id === task.id) { + chrome.runtime.onMessage.removeListener(listener); + chrome.storage.local.get("completed_tasks").then(({ completed_tasks = [] }) => { + completed_tasks.push({ + id: task.id, + site: task.site, + slug: task.slug, + url: msg.url || tab.url, + success: msg.success, + error: msg.error || null, + completed_at: new Date().toISOString(), + }); + chrome.storage.local.set({ completed_tasks }); + }); + // Close the tab once the adapter is done (give it a beat to flush state). + setTimeout(() => chrome.tabs.remove(tab.id).catch(() => {}), 2000); + resolve(); + } + }; + chrome.runtime.onMessage.addListener(listener); + setTimeout(() => { + chrome.runtime.onMessage.removeListener(listener); + resolve(); + }, 5 * 60 * 1000); // 5-minute hard timeout per task + }); +} diff --git a/scripts/syndication-extension/manifest.json b/scripts/syndication-extension/manifest.json new file mode 100644 index 0000000000..1af57c51ca --- /dev/null +++ b/scripts/syndication-extension/manifest.json @@ -0,0 +1,41 @@ +{ + "manifest_version": 3, + "name": "Codename One Syndicator", + "version": "0.1.0", + "description": "Syndicates Codename One blog posts to sites that block headless automation (Medium, DZone). Runs inside the user's logged-in browser session, so requests carry a real browser fingerprint and bypass Cloudflare bot detection.", + "permissions": [ + "storage", + "alarms", + "tabs", + "scripting" + ], + "host_permissions": [ + "https://medium.com/*", + "https://dzone.com/*", + "https://raw.githubusercontent.com/codenameone/CodenameOne/*" + ], + "background": { + "scripts": ["background.js"] + }, + "browser_specific_settings": { + "gecko": { + "id": "syndicator@codenameone.com" + } + }, + "action": { + "default_title": "Codename One Syndicator", + "default_popup": "popup.html" + }, + "content_scripts": [ + { + "matches": ["https://medium.com/new-story", "https://medium.com/p/*/edit"], + "js": ["adapters/common.js", "adapters/medium.js"], + "run_at": "document_idle" + }, + { + "matches": ["https://dzone.com/content/article/*"], + "js": ["adapters/common.js", "adapters/dzone.js"], + "run_at": "document_idle" + } + ] +} diff --git a/scripts/syndication-extension/popup.html b/scripts/syndication-extension/popup.html new file mode 100644 index 0000000000..62bb31eb10 --- /dev/null +++ b/scripts/syndication-extension/popup.html @@ -0,0 +1,30 @@ + + + + +CN1 Syndicator + + + +

Codename One Syndicator

+ + +
+

Pending

+
+

Completed

+
+

State patch (paste into syndication-state.json)

+
(none yet)
+ + + diff --git a/scripts/syndication-extension/popup.js b/scripts/syndication-extension/popup.js new file mode 100644 index 0000000000..4b47ea7c33 --- /dev/null +++ b/scripts/syndication-extension/popup.js @@ -0,0 +1,59 @@ +// Popup UI: list pending + completed syndication tasks, surface a JSON +// patch the user can drop into scripts/website/syndication-state.json +// to record the syndication results in the repo. + +async function refresh() { + const data = await chrome.storage.local.get(["pending_tasks", "completed_tasks", "last_poll"]); + const pending = data.pending_tasks || []; + const completed = data.completed_tasks || []; + const lastPoll = data.last_poll || "never"; + + document.getElementById("status").textContent = `Last poll: ${lastPoll}`; + document.getElementById("pending").innerHTML = + pending.length === 0 + ? '
none
' + : pending.map((t) => `
${t.site}: ${t.slug}
`).join(""); + document.getElementById("completed").innerHTML = + completed.length === 0 + ? '
none
' + : completed + .map( + (c) => + `
${c.site}: ${c.slug}
` + + `${c.success ? "OK" : "FAIL"} ` + + `${c.url || "(no url)"}` + + `${c.error ? `
${c.error}` : ""}` + + `
${c.completed_at}
` + ) + .join(""); + + // Build a JSON patch grouped by slug + const patch = { posts: {} }; + for (const c of completed) { + if (!c.success) continue; + if (!patch.posts[c.slug]) patch.posts[c.slug] = {}; + patch.posts[c.slug][c.site] = { + url: c.url, + syndicated_at: c.completed_at, + }; + } + document.getElementById("state-patch").textContent = + completed.filter((c) => c.success).length === 0 ? "(none yet)" : JSON.stringify(patch, null, 2); +} + +document.getElementById("poll").addEventListener("click", async () => { + await chrome.runtime.sendMessage({ type: "poll-now" }).catch(() => {}); + // The background's onClicked handler also runs poll on action click; but + // the popup is its own action. Send a message AND fall back to invoking + // the alarm by triggering the browser action explicitly via chrome.alarms. + setTimeout(refresh, 500); +}); + +document.getElementById("clear").addEventListener("click", async () => { + await chrome.storage.local.set({ completed_tasks: [] }); + refresh(); +}); + +// Make the popup poll immediately when opened +chrome.runtime.sendMessage({ type: "poll-now" }).catch(() => {}); +refresh(); diff --git a/scripts/website/export_storage_state.py b/scripts/website/export_storage_state.py new file mode 100755 index 0000000000..9f0500dce4 --- /dev/null +++ b/scripts/website/export_storage_state.py @@ -0,0 +1,290 @@ +#!/usr/bin/env python3 +"""Export a logged-in browser session for syndication targets that block +password-based automation (Medium has no password login at all; DZone +gates its login form behind invisible reCAPTCHA). + +Two paths: + + --from-firefox-profile read cookies straight from your existing Firefox + profile's cookies.sqlite (no second login) + --browser {chrome,...} launch Playwright with the chosen browser, open + the site's signin page, poll for auth cookies + +Output is a Playwright storageState JSON written to disk and (unless +--no-base64) a base64 blob ready to paste as the {SITE}_STORAGE_STATE +repo secret consumed by syndicate_browser_posts.py. + +Examples: + + python3 scripts/website/export_storage_state.py --site medium --from-firefox-profile + python3 scripts/website/export_storage_state.py --site dzone --browser firefox +""" + +from __future__ import annotations + +import argparse +import base64 +import glob +import json +import shutil +import sqlite3 +import sys +import tempfile +import time +from pathlib import Path + + +DEFAULT_OUTPUT = Path("medium-storage-state.json") +DEFAULT_TIMEOUT_SECONDS = 600 # 10 minutes for the user to complete login + +# Per-target site profile. Each entry knows where to land in a launched browser, +# which cookie domain to filter from a Firefox profile, and how to recognize +# a logged-in session (a function over the captured cookie list). +SITE_PROFILES: dict[str, dict] = { + "medium": { + "signin_url": "https://medium.com/m/signin", + "cookie_host_glob": "%medium.com", + # Medium assigns every visitor a `uid` cookie. Anonymous visitors get a + # value prefixed with `lo_`; a signed-in user gets one without it. + "is_logged_in": lambda cookies: any( + c.get("name") == "uid" and not (c.get("value") or "").startswith("lo_") + for c in cookies + ), + }, + "dzone": { + "signin_url": "https://dzone.com/users/login.html", + "cookie_host_glob": "%dzone.com", + # DZone uses Spring Security's `remember-me` cookie for long-lived auth + # plus a per-session `dz` cookie. Either one signals a logged-in + # session. + "is_logged_in": lambda cookies: any( + c.get("name") == "remember-me" or (c.get("name") or "").startswith("dz") + and (c.get("name") or "") not in ("dzuuid",) # dzuuid is anonymous + for c in cookies + ), + }, +} + + +def parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--site", choices=sorted(SITE_PROFILES), default="medium", + help="Which target site to capture a session for (default: medium).") + parser.add_argument("--output", default=None, + help="Path to write the storage state JSON (default: -storage-state.json)") + parser.add_argument("--no-base64", action="store_true", + help="Skip printing the base64 blob (just write the JSON file).") + parser.add_argument("--timeout", type=int, default=DEFAULT_TIMEOUT_SECONDS, + help="Maximum seconds to wait for login completion (default: 600).") + parser.add_argument("--interactive", action="store_true", + help="Wait for Enter on stdin instead of polling for auth cookies.") + parser.add_argument("--browser", default="chrome", choices=["chrome", "chromium", "firefox", "msedge"], + help="Which Playwright browser to launch (default: chrome).") + parser.add_argument("--from-firefox-profile", nargs="?", const="auto", default=None, + help=("Skip launching a browser and instead read medium.com cookies from an " + "existing Firefox profile's cookies.sqlite. Pass a path or omit for auto-detect.")) + return parser.parse_args(argv) + + +def _locate_firefox_profile(explicit: str | None) -> Path: + if explicit and explicit != "auto": + path = Path(explicit).expanduser() + if path.is_file(): + return path + if path.is_dir(): + candidate = path / "cookies.sqlite" + if candidate.is_file(): + return candidate + raise RuntimeError(f"Firefox cookies.sqlite not found at {path}") + # Auto-detect macOS Firefox profile. + base = Path.home() / "Library" / "Application Support" / "Firefox" / "Profiles" + if not base.exists(): + # Linux / other-OS fallbacks. + for guess in (Path.home() / ".mozilla" / "firefox", Path.home() / "snap" / "firefox" / "common" / ".mozilla" / "firefox"): + if guess.exists(): + base = guess + break + if not base.exists(): + raise RuntimeError("Could not locate a Firefox profiles directory.") + candidates = sorted(glob.glob(str(base / "*default*" / "cookies.sqlite"))) or sorted( + glob.glob(str(base / "*" / "cookies.sqlite")) + ) + if not candidates: + raise RuntimeError(f"No cookies.sqlite found under {base}") + # Prefer the most recently modified profile. + return Path(max(candidates, key=lambda p: Path(p).stat().st_mtime)) + + +def _firefox_storage_state(cookies_db: Path, host_glob: str) -> dict: + # Copy to a temp file because Firefox holds a write lock on the live DB. + with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as tmp: + tmp_path = Path(tmp.name) + shutil.copy2(cookies_db, tmp_path) + try: + conn = sqlite3.connect(f"file:{tmp_path}?mode=ro", uri=True) + cur = conn.execute( + "SELECT name, value, host, path, expiry, isSecure, isHttpOnly, sameSite " + "FROM moz_cookies WHERE host LIKE ?", + (host_glob,), + ) + rows = cur.fetchall() + conn.close() + finally: + tmp_path.unlink(missing_ok=True) + samesite_map = {0: "None", 1: "Lax", 2: "Strict"} + cookies = [] + for name, value, host, path, expiry, is_secure, is_http_only, same_site in rows: + cookies.append({ + "name": name, + "value": value, + "domain": host if host.startswith(".") else "." + host, + "path": path or "/", + "expires": _normalize_expiry(expiry), + "httpOnly": bool(is_http_only), + "secure": bool(is_secure), + "sameSite": samesite_map.get(int(same_site or 0), "None"), + }) + return {"cookies": cookies, "origins": []} + + +def _normalize_expiry(raw: float | int | None) -> float: + """Coerce a Firefox cookies.sqlite expiry into a Playwright-acceptable value. + + Playwright wants seconds-since-epoch (positive number) or -1 for session. + Firefox stores `expiry` in seconds in older code but in milliseconds in + newer entries. Anything past ~year 5138 must be milliseconds — divide. + """ + if not raw: + return -1.0 + value = float(raw) + if value > 1e11: # > ~year 5138 in seconds; treat as milliseconds. + value = value / 1000.0 + return value + + +def main(argv: list[str]) -> int: + args = parse_args(argv) + profile = SITE_PROFILES[args.site] + output_path = Path(args.output or f"{args.site}-storage-state.json").resolve() + secret_name = f"{args.site.upper()}_STORAGE_STATE" + + if args.from_firefox_profile is not None: + try: + cookies_db = _locate_firefox_profile(args.from_firefox_profile) + except RuntimeError as err: + print(f"Error: {err}", file=sys.stderr) + return 1 + print(f"Reading {args.site} cookies from Firefox profile: {cookies_db}") + state = _firefox_storage_state(cookies_db, profile["cookie_host_glob"]) + if not profile["is_logged_in"](state["cookies"]): + print(f"Error: this Firefox profile does not appear to be logged in to {args.site}.", + file=sys.stderr) + return 1 + output_path.write_text(json.dumps(state), encoding="utf-8") + print(f"Wrote storage state: {output_path}") + print(f" cookies captured: {len(state['cookies'])}") + if not args.no_base64: + encoded = base64.b64encode(output_path.read_bytes()).decode("ascii") + print() + print(f"Paste the following as the {secret_name} repository secret:") + print("-" * 72) + print(encoded) + print("-" * 72) + return 0 + + try: + from playwright.sync_api import sync_playwright + except ImportError: + print("Playwright is not installed. In a venv, run: pip install playwright && playwright install chromium", + file=sys.stderr) + return 1 + + with sync_playwright() as pw: + launch_kwargs: dict = {"headless": False} + # The args namespace renamed channel to browser to allow Firefox. + browser_choice = args.browser + if browser_choice == "firefox": + try: + browser = pw.firefox.launch(headless=False) + except Exception as err: # noqa: BLE001 + print(f"Could not launch Playwright Firefox ({err}). " + "Run `playwright install firefox` and retry.", file=sys.stderr) + return 1 + else: + if browser_choice and browser_choice != "chromium": + launch_kwargs["channel"] = browser_choice + try: + browser = pw.chromium.launch(**launch_kwargs) + except Exception as err: # noqa: BLE001 — channel may not be installed + print(f"Could not launch with browser='{browser_choice}' ({err}); falling back to bundled Chromium.", + file=sys.stderr) + browser = pw.chromium.launch(headless=False) + + context = browser.new_context( + viewport={"width": 1280, "height": 900}, + user_agent=( + "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_0) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" + ), + ) + page = context.new_page() + page.goto(profile["signin_url"]) + + print() + print("=" * 72) + print(f"A browser window has opened on {args.site}'s sign-in page.") + print("Log in (Google / email / whatever you normally use).") + if args.interactive: + print("When you can see your Medium home or profile, return here and press Enter.") + else: + print(f"The script will detect login automatically (waiting up to {args.timeout}s).") + print("=" * 72) + sys.stdout.flush() + + if args.interactive: + try: + input("Press Enter once you are logged in… ") + except (KeyboardInterrupt, EOFError): + print("Aborted.", file=sys.stderr) + browser.close() + return 1 + else: + deadline = time.time() + args.timeout + detected = False + while time.time() < deadline: + if profile["is_logged_in"](context.cookies(profile["signin_url"])): + detected = True + break + time.sleep(3) + if not detected: + print(f"Timed out waiting for {args.site} login — auth cookies not detected.", + file=sys.stderr) + browser.close() + return 1 + print("Logged-in cookies detected — capturing session state…") + # Give the site a couple seconds to finish setting localStorage. + time.sleep(3) + + state = context.storage_state() + output_path.write_text(json.dumps(state), encoding="utf-8") + browser.close() + + print() + print(f"Wrote storage state: {output_path}") + print(f" cookies captured: {len(state.get('cookies', []))}") + print(f" origins with localStorage: {len(state.get('origins', []))}") + + if args.no_base64: + return 0 + + encoded = base64.b64encode(output_path.read_bytes()).decode("ascii") + print() + print(f"Paste the following as the {secret_name} repository secret:") + print("-" * 72) + print(encoded) + print("-" * 72) + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/scripts/website/queue_browser_syndication.py b/scripts/website/queue_browser_syndication.py new file mode 100644 index 0000000000..6c15018c59 --- /dev/null +++ b/scripts/website/queue_browser_syndication.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python3 +"""Append browser-only syndication tasks to syndication-queue.json. + +This script is the bridge between the daily CI cron (which knows which +posts are eligible for syndication) and the Codename One Syndicator +Firefox extension (which runs inside the user's logged-in Firefox to +drive Medium/DZone editors past Cloudflare). + +Daily flow: + + 1. CI runs the API syndicator (foojay, dev.to, hashnode) directly. + 2. CI runs *this* script for `medium,dzone` (or whatever browser + platforms are configured) — it appends a task entry to + syndication-queue.json for every eligible post that does not + already have an entry in syndication-state.json for that platform. + 3. The committed queue file is what the extension polls. When the + user's Firefox is online, the extension processes pending tasks. + 4. The extension's popup surfaces a JSON patch the user can paste into + syndication-state.json (or a small local script can ingest the + extension's results). + +Tasks are deduplicated by id (`:`). +""" + +from __future__ import annotations + +import argparse +import datetime as dt +import json +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent)) +from syndicate_blog_posts import ( # noqa: E402 + BLOG_DIR, + ELIGIBILITY_FLOOR, + MIN_AGE_DAYS, + Post, + STATE_FILE, + State, + discover_posts, + render_syndicated_body, +) + +QUEUE_FILE = Path(__file__).resolve().parent / "syndication-queue.json" +DEFAULT_PLATFORMS = "medium,dzone" + + +def _markdown_to_html(text: str) -> str: + try: + import markdown as _md + except ImportError: + from html import escape as _esc + return f"
{_esc(text)}
" + return _md.markdown(text, extensions=["extra", "fenced_code", "sane_lists"], output_format="html5") + + +def _eligible_posts(today: dt.date, floor: dt.date, min_age_days: int, blog_dir: Path) -> list[Post]: + posts = discover_posts(blog_dir) + cutoff = today - dt.timedelta(days=min_age_days) + return [p for p in posts if p.date > floor and p.date <= cutoff] + + +def _build_task(post: Post, platform: str) -> dict: + body_html = _markdown_to_html(render_syndicated_body(post)) + return { + "id": f"{platform}:{post.slug}", + "site": platform, + "slug": post.slug, + "title": post.title, + "canonical": post.canonical_url, + "description": str(post.front_matter.get("description") or "").strip(), + "cover_image_url": post.cover_image, + "body_html": body_html, + "queued_at": dt.datetime.now(dt.timezone.utc).isoformat(timespec="seconds"), + } + + +def parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--platforms", default=DEFAULT_PLATFORMS, + help=f"Comma-separated platforms (default: {DEFAULT_PLATFORMS}).") + parser.add_argument("--today", default=None, help="Override today's date (YYYY-MM-DD).") + parser.add_argument("--floor", default=ELIGIBILITY_FLOOR.isoformat()) + parser.add_argument("--min-age-days", type=int, default=MIN_AGE_DAYS) + parser.add_argument("--blog-dir", default=str(BLOG_DIR)) + parser.add_argument("--state-file", default=str(STATE_FILE)) + parser.add_argument("--queue-file", default=str(QUEUE_FILE)) + parser.add_argument("--dry-run", action="store_true", + help="Print what would be queued; do not modify the file.") + return parser.parse_args(argv) + + +def main(argv: list[str]) -> int: + args = parse_args(argv) + today = dt.date.fromisoformat(args.today) if args.today else dt.date.today() + floor = dt.date.fromisoformat(args.floor) + platforms = [p.strip() for p in args.platforms.split(",") if p.strip()] + state = State.load(Path(args.state_file)) + posts = _eligible_posts(today, floor, args.min_age_days, Path(args.blog_dir)) + + queue_path = Path(args.queue_file) + if queue_path.exists(): + queue = json.loads(queue_path.read_text(encoding="utf-8")) + else: + queue = {"tasks": []} + existing_ids = {t.get("id") for t in queue.get("tasks", [])} + + new_tasks: list[dict] = [] + for post in posts: + for platform in platforms: + task_id = f"{platform}:{post.slug}" + if task_id in existing_ids: + continue + if state.is_syndicated(post.slug, platform): + continue + new_tasks.append(_build_task(post, platform)) + + if not new_tasks: + print("No new browser-syndication tasks to queue.") + return 0 + + print(f"Queueing {len(new_tasks)} new task(s):") + for t in new_tasks: + print(f" + {t['id']}") + + if args.dry_run: + return 0 + + queue.setdefault("tasks", []).extend(new_tasks) + queue_path.write_text(json.dumps(queue, indent=2) + "\n", encoding="utf-8") + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/scripts/website/syndicate_blog_posts.py b/scripts/website/syndicate_blog_posts.py new file mode 100755 index 0000000000..01f6303c7e --- /dev/null +++ b/scripts/website/syndicate_blog_posts.py @@ -0,0 +1,515 @@ +#!/usr/bin/env python3 +"""Syndicate Codename One Hugo blog posts to dev.to and Hashnode. + +Selects the oldest blog post under ``docs/website/content/blog`` that: + +* has a ``date`` strictly after the eligibility floor (default: 2026-04-30), +* is at least ``--min-age-days`` old (default: 7), +* has not yet been syndicated to a given target platform. + +For each unsyndicated platform on the chosen post the script POSTs the +content with ``canonical_url`` pointing back at the original on +``www.codenameone.com`` and records the resulting URL / id in +``scripts/website/syndication-state.json``. + +Designed to run from a daily GitHub Action with only the Python standard +library available. +""" + +from __future__ import annotations + +import argparse +import datetime as dt +import json +import os +import re +import sys +import urllib.error +import urllib.request +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + + +REPO_ROOT = Path(__file__).resolve().parents[2] +BLOG_DIR = REPO_ROOT / "docs" / "website" / "content" / "blog" +STATE_FILE = REPO_ROOT / "scripts" / "website" / "syndication-state.json" +SITE_BASE_URL = "https://www.codenameone.com" + +ELIGIBILITY_FLOOR = dt.date(2026, 4, 30) # posts must be strictly newer than this +MIN_AGE_DAYS = 7 + +CN1_BLURB = ( + '' +) + +# Hugo-only tail blocks that should not be syndicated. +_HUGO_FOOTER_RE = re.compile(r"\n\s*---\s*\n+##\s*Discussion\b.*\Z", re.DOTALL | re.IGNORECASE) +_HUGO_SHORTCODE_RE = re.compile(r"\{\{<[^>]*>\}\}|\{\{%[^%]*%\}\}") + +DEVTO_TAGS = ["java", "mobile", "android", "ios"] +HASHNODE_TAGS = [ + {"slug": "java", "name": "Java"}, + {"slug": "mobile", "name": "Mobile"}, + {"slug": "android", "name": "Android"}, + {"slug": "ios", "name": "iOS"}, +] + +DEFAULT_PLATFORMS = "devto,hashnode" + + +@dataclass +class Post: + path: Path + slug: str + title: str + date: dt.date + front_matter: dict[str, Any] + body: str + + @property + def canonical_url(self) -> str: + url_field = self.front_matter.get("url") + if isinstance(url_field, str) and url_field.startswith("/"): + return f"{SITE_BASE_URL}{url_field}" + return f"{SITE_BASE_URL}/blog/{self.slug}/" + + @property + def cover_image(self) -> str | None: + match = re.search(r"!\[[^\]]*\]\((/blog/[^)\s]+)\)", self.body) + if match: + return f"{SITE_BASE_URL}{match.group(1)}" + return None + + +@dataclass +class State: + raw: dict[str, Any] = field(default_factory=dict) + + @classmethod + def load(cls, path: Path) -> "State": + if not path.exists(): + return cls(raw={"posts": {}}) + with path.open("r", encoding="utf-8") as handle: + data = json.load(handle) + if "posts" not in data or not isinstance(data["posts"], dict): + data["posts"] = {} + return cls(raw=data) + + def save(self, path: Path) -> None: + ordered = {key: self.raw[key] for key in ("_comment", "posts") if key in self.raw} + for key, value in self.raw.items(): + if key not in ordered: + ordered[key] = value + with path.open("w", encoding="utf-8") as handle: + json.dump(ordered, handle, indent=2, sort_keys=False) + handle.write("\n") + + def is_syndicated(self, slug: str, platform: str) -> bool: + post = self.raw["posts"].get(slug) + if not post: + return False + entry = post.get(platform) + return bool(entry and entry.get("url")) + + def record(self, slug: str, platform: str, payload: dict[str, Any]) -> None: + post = self.raw["posts"].setdefault(slug, {}) + post[platform] = payload + + +def parse_front_matter(text: str) -> tuple[dict[str, Any], str]: + """Parse the small subset of YAML front matter the blog uses. + + The site's posts use simple ``key: value`` pairs (no nesting, no lists), + so a hand-rolled parser keeps this script dependency-free. + """ + if not text.startswith("---\n"): + raise ValueError("missing front matter") + end = text.find("\n---\n", 4) + if end == -1: + raise ValueError("unterminated front matter") + block = text[4:end] + body = text[end + len("\n---\n") :] + + fm: dict[str, Any] = {} + current_key: str | None = None + current_lines: list[str] | None = None + + for raw_line in block.splitlines(): + if current_key is not None and (raw_line.startswith(" ") or raw_line.startswith("\t") or raw_line == ""): + current_lines.append(raw_line) + continue + if current_lines is not None and current_key is not None: + fm[current_key] = _coerce_scalar("\n".join(current_lines).strip()) + current_key = None + current_lines = None + + match = re.match(r"^([A-Za-z0-9_]+):\s*(.*)$", raw_line) + if not match: + continue + key, value = match.group(1), match.group(2) + if value == "": + current_key = key + current_lines = [] + else: + fm[key] = _coerce_scalar(value) + + if current_lines is not None and current_key is not None: + fm[current_key] = _coerce_scalar("\n".join(current_lines).strip()) + + return fm, body + + +def _coerce_scalar(value: str) -> Any: + stripped = value.strip() + if len(stripped) >= 2 and stripped[0] == stripped[-1] and stripped[0] in ("'", '"'): + inner = stripped[1:-1] + if stripped[0] == "'": + inner = inner.replace("''", "'") + return inner + if stripped.lower() in ("true", "false"): + return stripped.lower() == "true" + return stripped + + +def parse_post(path: Path) -> Post | None: + text = path.read_text(encoding="utf-8") + try: + fm, body = parse_front_matter(text) + except ValueError: + return None + date_value = fm.get("date") + if not isinstance(date_value, str): + return None + try: + date = dt.date.fromisoformat(date_value[:10]) + except ValueError: + return None + slug = fm.get("slug") or path.stem + title = fm.get("title") or slug + return Post(path=path, slug=slug, title=str(title), date=date, front_matter=fm, body=body) + + +def discover_posts(blog_dir: Path) -> list[Post]: + posts: list[Post] = [] + for path in sorted(blog_dir.glob("*.md")): + if path.name.startswith("_"): + continue + post = parse_post(path) + if post is not None: + posts.append(post) + posts.sort(key=lambda p: p.date) + return posts + + +def select_candidate( + posts: list[Post], + state: State, + platforms: list[str], + today: dt.date, + floor: dt.date, + min_age_days: int, +) -> Post | None: + cutoff = today - dt.timedelta(days=min_age_days) + for post in posts: + if post.date <= floor: + continue + if post.date > cutoff: + continue + if all(state.is_syndicated(post.slug, p) for p in platforms): + continue + return post + return None + + +_RELATIVE_LINK_RE = re.compile(r"(\]\()(/[^)\s]+)(\))") +_RELATIVE_IMG_RE = re.compile(r'(]*src=["\'])(/[^"\']+)(["\'])', re.IGNORECASE) + + +def absolutize_links(body: str) -> str: + body = _RELATIVE_LINK_RE.sub(lambda m: f"{m.group(1)}{SITE_BASE_URL}{m.group(2)}{m.group(3)}", body) + body = _RELATIVE_IMG_RE.sub(lambda m: f"{m.group(1)}{SITE_BASE_URL}{m.group(2)}{m.group(3)}", body) + return body + + +def insert_blurb(body: str, blurb: str) -> str: + """Insert ``blurb`` after the first non-image paragraph (i.e. after the fold).""" + lines = body.split("\n") + n = len(lines) + i = 0 + # skip leading blank lines + while i < n and lines[i].strip() == "": + i += 1 + # skip a leading header image (a paragraph that is just a markdown image) + if i < n and re.match(r"^!\[[^\]]*\]\([^)]+\)\s*$", lines[i].strip()): + i += 1 + while i < n and lines[i].strip() == "": + i += 1 + # skip the first paragraph of body text + while i < n and lines[i].strip() != "": + i += 1 + # i now points at the blank line (or EOF) following the first text paragraph + insertion = ["", blurb, ""] + return "\n".join(lines[:i] + insertion + lines[i:]) + + +def render_syndicated_body(post: Post) -> str: + body = post.body.strip("\n") + body = _HUGO_FOOTER_RE.sub("", body) + body = _HUGO_SHORTCODE_RE.sub("", body).rstrip() + body = absolutize_links(body) + body = insert_blurb(body, CN1_BLURB) + return body + + +USER_AGENT = "CodenameOneBlogSyndicator/1.0 (+https://github.com/codenameone/CodenameOne)" + + +def http_post_json(url: str, headers: dict[str, str], payload: dict[str, Any]) -> dict[str, Any]: + data = json.dumps(payload).encode("utf-8") + request = urllib.request.Request(url, data=data, method="POST") + request.add_header("Content-Type", "application/json") + request.add_header("User-Agent", USER_AGENT) + request.add_header("Accept", "application/json") + for key, value in headers.items(): + request.add_header(key, value) + try: + with urllib.request.urlopen(request, timeout=60) as response: + body = response.read().decode("utf-8") + except urllib.error.HTTPError as err: + detail = err.read().decode("utf-8", errors="replace") + raise RuntimeError(f"{url} returned HTTP {err.code}: {detail}") from err + if not body: + return {} + return json.loads(body) + + +def publish_to_devto(post: Post, body_markdown: str, api_key: str, draft: bool = False) -> dict[str, Any]: + payload: dict[str, Any] = { + "article": { + "title": post.title, + "body_markdown": body_markdown, + "published": not draft, + "canonical_url": post.canonical_url, + "tags": DEVTO_TAGS, + "description": str(post.front_matter.get("description") or "")[:250] or None, + } + } + cover = post.cover_image + if cover: + payload["article"]["main_image"] = cover + payload["article"] = {k: v for k, v in payload["article"].items() if v is not None} + + response = http_post_json( + "https://dev.to/api/articles", + headers={"api-key": api_key, "Accept": "application/vnd.forem.api-v1+json"}, + payload=payload, + ) + article_id = response.get("id") + # The URL field on dev.to returns the public canonical URL of the article, + # but for unpublished drafts that URL 404s for anyone who is not the author. + # In draft mode point users at the dashboard, where the draft is editable. + if draft and article_id: + url = f"https://dev.to/dashboard/{article_id}/edit" + else: + url = response.get("url") or response.get("canonical_url") + return { + "id": article_id, + "url": url, + "syndicated_at": dt.datetime.now(dt.timezone.utc).isoformat(timespec="seconds"), + } + + +def publish_to_hashnode(post: Post, body_markdown: str, token: str, publication_id: str, + draft: bool = False) -> dict[str, Any]: + if draft: + mutation = """ + mutation CreateDraft($input: CreateDraftInput!) { + createDraft(input: $input) { + draft { id slug } + } + } + """.strip() + else: + mutation = """ + mutation PublishPost($input: PublishPostInput!) { + publishPost(input: $input) { + post { id slug url } + } + } + """.strip() + + input_obj: dict[str, Any] = { + "title": post.title, + "contentMarkdown": body_markdown, + "publicationId": publication_id, + "tags": HASHNODE_TAGS, + "originalArticleURL": post.canonical_url, + } + cover = post.cover_image + if cover: + input_obj["coverImageOptions"] = {"coverImageURL": cover} + subtitle = str(post.front_matter.get("description") or "").strip() + if subtitle: + input_obj["subtitle"] = subtitle[:250] + + response = http_post_json( + "https://gql.hashnode.com", + headers={"Authorization": token}, + payload={"query": mutation, "variables": {"input": input_obj}}, + ) + if response.get("errors"): + raise RuntimeError(f"hashnode GraphQL errors: {response['errors']}") + data = response.get("data") or {} + if draft: + node = data.get("createDraft", {}).get("draft", {}) + url = f"https://hashnode.com/draft/{node.get('id')}" if node.get("id") else None + else: + node = data.get("publishPost", {}).get("post", {}) + url = node.get("url") + return { + "id": node.get("id"), + "url": url, + "syndicated_at": dt.datetime.now(dt.timezone.utc).isoformat(timespec="seconds"), + } + + +def parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--dry-run", action="store_true", help="Do not call any APIs; print what would happen.") + parser.add_argument("--draft-mode", action="store_true", + help="Create as draft (dev.to: published=false; Hashnode: createDraft) instead of publishing. Useful for verifying formatting without going live.") + parser.add_argument( + "--platforms", + default=DEFAULT_PLATFORMS, + help=f"Comma-separated subset of platforms to consider (default: {DEFAULT_PLATFORMS}).", + ) + parser.add_argument( + "--today", + default=None, + help="Override today's date (YYYY-MM-DD). Useful for testing.", + ) + parser.add_argument( + "--floor", + default=ELIGIBILITY_FLOOR.isoformat(), + help=f"Posts must be dated strictly after this date (default: {ELIGIBILITY_FLOOR.isoformat()}).", + ) + parser.add_argument( + "--min-age-days", + type=int, + default=MIN_AGE_DAYS, + help=f"Minimum post age in days before syndicating (default: {MIN_AGE_DAYS}).", + ) + parser.add_argument( + "--blog-dir", + default=str(BLOG_DIR), + help="Directory containing Hugo blog posts.", + ) + parser.add_argument( + "--state-file", + default=str(STATE_FILE), + help="Path to syndication state JSON.", + ) + return parser.parse_args(argv) + + +def is_platform_configured(platform: str) -> bool: + if platform == "devto": + return bool(os.environ.get("DEVTO_API_KEY")) + if platform == "hashnode": + return bool(os.environ.get("HASHNODE_TOKEN") and os.environ.get("HASHNODE_PUBLICATION_ID")) + return False + + +def main(argv: list[str]) -> int: + args = parse_args(argv) + today = dt.date.fromisoformat(args.today) if args.today else dt.date.today() + floor = dt.date.fromisoformat(args.floor) + requested_platforms = [p.strip() for p in args.platforms.split(",") if p.strip()] + blog_dir = Path(args.blog_dir) + state_file = Path(args.state_file) + + if args.dry_run: + platforms = requested_platforms + else: + platforms = [] + for platform in requested_platforms: + if is_platform_configured(platform): + platforms.append(platform) + else: + # Skipping an unconfigured platform here (instead of failing) keeps + # the candidate selector from getting stuck on a post that can never + # be fully syndicated. Once the missing creds appear, the next run + # picks up where this one left off. + print(f"[{platform}] credentials not configured; skipping platform.") + + if not platforms: + print("No platforms are configured; nothing to do.") + return 0 + + posts = discover_posts(blog_dir) + state = State.load(state_file) + candidate = select_candidate(posts, state, platforms, today, floor, args.min_age_days) + if candidate is None: + print("No syndication candidate found today.") + return 0 + + print(f"Selected post: {candidate.slug} (date={candidate.date.isoformat()})") + body_markdown = render_syndicated_body(candidate) + + any_change = False + failures: list[str] = [] + + for platform in platforms: + if state.is_syndicated(candidate.slug, platform): + print(f" [{platform}] already syndicated; skipping.") + continue + if args.dry_run: + print(f" [{platform}] dry run — would publish {len(body_markdown)} chars, canonical {candidate.canonical_url}") + continue + try: + if platform == "devto": + result = publish_to_devto( + candidate, body_markdown, os.environ["DEVTO_API_KEY"], + draft=args.draft_mode, + ) + elif platform == "hashnode": + result = publish_to_hashnode( + candidate, + body_markdown, + os.environ["HASHNODE_TOKEN"], + os.environ["HASHNODE_PUBLICATION_ID"], + draft=args.draft_mode, + ) + else: + raise RuntimeError(f"unknown platform: {platform}") + except Exception as err: # noqa: BLE001 — surface any failure as per-platform + print(f" [{platform}] FAILED: {err}", file=sys.stderr) + failures.append(platform) + continue + + if not result.get("url"): + print(f" [{platform}] response missing URL: {result}", file=sys.stderr) + failures.append(platform) + continue + + state.record(candidate.slug, platform, result) + any_change = True + print(f" [{platform}] published: {result['url']}") + + if any_change: + state.save(state_file) + print(f"Updated state file: {state_file}") + + if failures: + return 2 + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/scripts/website/syndicate_browser_posts.py b/scripts/website/syndicate_browser_posts.py new file mode 100755 index 0000000000..fa39757d30 --- /dev/null +++ b/scripts/website/syndicate_browser_posts.py @@ -0,0 +1,574 @@ +#!/usr/bin/env python3 +"""Syndicate Codename One Hugo blog posts to sites that have no usable API. + +Counterpart to ``syndicate_blog_posts.py``: instead of POSTing to a REST/ +GraphQL endpoint, this script drives a real (headless) browser via Playwright +and submits the post through the site's normal authoring UI as a draft for +editorial review. Shares ``Post`` discovery, body rendering, and the +``syndication-state.json`` state file with the API-based script. + +Adapters (one class per target site) live at the bottom of this file. Each +adapter exposes a ``login()`` and a ``submit_draft()`` step. Selectors are +kept as constants at the top of each adapter so they are easy to update when +the site changes its UI — which it will, so plan on it. + +Usage: + + # First-time setup, watch the browser, take screenshots of the editor: + python3 scripts/website/syndicate_browser_posts.py \ + --platforms foojay --validate-only --headed --today 2026-05-08 + + # Real syndication (headless, daily-cron style): + python3 scripts/website/syndicate_browser_posts.py --platforms foojay + +Required env vars per platform (script auto-skips a platform when its creds +are missing, just like the API script): + + foojay : FOOJAY_USER, FOOJAY_PASSWORD + +DZone and Medium are NOT driven from this Playwright script — both sit +behind aggressive Cloudflare bot detection that cannot be bypassed +reliably from headless automation. They are queued to the Codename One +Syndicator Firefox extension instead, which runs inside the user's +already-trusted browser session. See scripts/syndication-extension/. + +HackerNoon was previously supported here but removed: HackerNoon +charges business sites for canonical URL support, which makes it +unsuitable for syndication where the canonical link back to the +original is the whole point. +""" + +from __future__ import annotations + +import argparse +import base64 +import datetime as dt +import json +import os +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Callable + +# Reuse the API-based script's discovery, body rendering, and state machinery. +sys.path.insert(0, str(Path(__file__).resolve().parent)) +from syndicate_blog_posts import ( # noqa: E402 (intentional path injection) + BLOG_DIR, + ELIGIBILITY_FLOOR, + MIN_AGE_DAYS, + Post, + STATE_FILE, + State, + discover_posts, + render_syndicated_body, + select_candidate, +) + + +SCREENSHOT_DIR = Path(__file__).resolve().parents[2] / "docs" / "website" / "reports" / "syndication-screenshots" +# DZone and Medium are no longer driven from this Playwright script — both +# are gated by Cloudflare bot detection that headless browsers cannot pass +# reliably. Their syndication is queued to the Codename One Syndicator +# Firefox extension via scripts/website/queue_browser_syndication.py. +DEFAULT_PLATFORMS = "foojay" + +_UA_STR = ( + "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_0) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" +) + + +@dataclass +class AdapterContext: + post: Post + body_markdown: str + headed: bool + validate_only: bool + + +# --------------------------------------------------------------------------- # +# Adapters # +# --------------------------------------------------------------------------- # + + +class AdapterError(RuntimeError): + """Raised when an adapter cannot complete its flow.""" + + +def _find_first(page, selectors: list[str], *, timeout: int = 15000): + """Try each selector in turn; return the first that becomes visible. + + Adapters list multiple plausible selectors per field so a small UI tweak + on the target site does not break the run. The first match wins. + """ + last_error: Exception | None = None + for selector in selectors: + try: + handle = page.wait_for_selector(selector, timeout=timeout, state="visible") + if handle: + return handle + except Exception as err: # noqa: BLE001 — Playwright TimeoutError, etc. + last_error = err + continue + raise AdapterError(f"none of the selectors matched: {selectors}: {last_error}") + + +def _trim_for_meta_description(text: str, limit: int = 140) -> str: + """Trim a description to Yoast's preferred meta-description length, on a word boundary.""" + text = (text or "").strip() + if len(text) <= limit: + return text + truncated = text[:limit].rsplit(" ", 1)[0].rstrip(",.;:") + return truncated + "…" + + +def _load_base64_storage_state(env_var: str) -> Path: + """Decode a base64-encoded storage_state JSON from an env var to a temp file.""" + encoded = os.environ[env_var] + decoded = base64.b64decode(encoded) + path = Path(f"/tmp/{env_var.lower()}.json") + path.write_bytes(decoded) + return path + + +def _save_screenshot(page, slug: str, label: str) -> Path: + SCREENSHOT_DIR.mkdir(parents=True, exist_ok=True) + stamp = dt.datetime.now(dt.timezone.utc).strftime("%Y%m%dT%H%M%SZ") + path = SCREENSHOT_DIR / f"{slug}-{label}-{stamp}.png" + try: + page.screenshot(path=str(path), full_page=True) + except Exception: # noqa: BLE001 — never let a screenshot failure mask the real error + return path + return path + + +class FoojayAdapter: + """foojay.io — Playwright login + REST API draft creation. + + Pure UI submission to foojay does not work reliably: Cloudflare in front + of foojay challenges form POSTs to /wp-admin/post.php and drops the + form payload during the challenge, so the draft is never created. The + REST API is not subject to the same challenge, but Wordfence has + Application Passwords disabled, so token auth is also out. + + The working hybrid: drive wp-login.php with Playwright to obtain a real + user session (cookies), pull the WP REST nonce from /wp-admin/, then + POST the draft through /wp-json/wp/v2/posts with cookie + X-WP-Nonce + auth. Behaves "as a website user" end-to-end while sidestepping both + the app-password block and the Cloudflare POST challenge. + """ + + name = "foojay" + LOGIN_URL = "https://foojay.io/wp-login.php" + BASE_URL = "https://foojay.io" + REST_POSTS_ENDPOINT = "https://foojay.io/wp-json/wp/v2/posts" + REST_TAGS_ENDPOINT = "https://foojay.io/wp-json/wp/v2/tags" + REST_MEDIA_ENDPOINT = "https://foojay.io/wp-json/wp/v2/media" + XMLRPC_ENDPOINT = "https://foojay.io/xmlrpc.php" + + # Pre-resolved category and tag IDs (from /wp-json/wp/v2/categories?search=java + # and /wp-json/wp/v2/tags?slug=codenameone). The tag is created lazily on + # first use if it does not yet exist. + JAVA_CATEGORY_ID = 1722 + CODENAMEONE_TAG_SLUG = "codenameone" + CODENAMEONE_TAG_NAME = "Codename One" + + USER_SELECTORS = ["#user_login"] + PASSWORD_SELECTORS = ["#user_pass"] + SUBMIT_SELECTORS = ["#wp-submit"] + + @staticmethod + def is_configured() -> bool: + return bool(os.environ.get("FOOJAY_USER") and os.environ.get("FOOJAY_PASSWORD")) + + def login(self, page) -> None: + page.goto(self.LOGIN_URL, wait_until="domcontentloaded") + _find_first(page, self.USER_SELECTORS).fill(os.environ["FOOJAY_USER"]) + _find_first(page, self.PASSWORD_SELECTORS).fill(os.environ["FOOJAY_PASSWORD"]) + _find_first(page, self.SUBMIT_SELECTORS).click() + try: + page.wait_for_url("**/wp-admin/**", timeout=90000) + except Exception: # noqa: BLE001 + page.wait_for_selector("#wpadminbar", timeout=30000) + + def submit_draft(self, page, ctx: AdapterContext) -> dict[str, Any]: + # Land on wp-admin so wpApiSettings (which carries the nonce) is in scope. + page.goto("https://foojay.io/wp-admin/", wait_until="domcontentloaded", timeout=60000) + nonce = page.evaluate( + "() => (window.wpApiSettings && window.wpApiSettings.nonce) || null" + ) + if not nonce: + raise AdapterError("could not extract wpApiSettings.nonce from /wp-admin/") + + if ctx.validate_only: + shot = _save_screenshot(page, ctx.post.slug, "foojay-editor") + return {"validated": True, "screenshot": str(shot), "nonce_acquired": True} + + cookies = page.context.cookies("https://foojay.io/") + cookie_header = "; ".join(f"{c['name']}={c['value']}" for c in cookies) + + # Resolve / create the codenameone tag. + tag_id = self._ensure_tag(cookie_header, nonce) + # Upload the cover image into the WP media library and use the + # returned media ID as the post's featured image. + featured_media_id: int | None = None + if ctx.post.cover_image: + try: + featured_media_id = self._upload_featured_media( + cookie_header, nonce, ctx.post.cover_image, ctx.post.title + ) + except Exception as err: # noqa: BLE001 — featured image is best-effort + print(f" [foojay] featured image upload failed (non-fatal): {err}", file=sys.stderr) + + # Yoast canonical (_yoast_wpseo_canonical) is not registered for REST + # writes on this Yoast install. We send it in `meta` regardless (it's + # silently ignored if rejected, accepted if registered) AND surface it + # as a hidden HTML comment at the top of the body so the editor can + # spot the original URL when filling Yoast's metabox. + excerpt = str(ctx.post.front_matter.get("description") or "").strip() + canonical_prefix = f"\n\n" + + payload: dict[str, Any] = { + "title": ctx.post.title, + "content": canonical_prefix + ctx.body_markdown, + "status": "draft", + "categories": [self.JAVA_CATEGORY_ID], + "tags": [tag_id] if tag_id else [], + "meta": { + "_yoast_wpseo_canonical": ctx.post.canonical_url, + "_yoast_wpseo_title": ctx.post.title, + "_yoast_wpseo_metadesc": excerpt[:155] if excerpt else "", + }, + } + if featured_media_id: + payload["featured_media"] = featured_media_id + if excerpt: + payload["excerpt"] = excerpt[:500] + + data = self._rest_post(self.REST_POSTS_ENDPOINT, cookie_header, nonce, payload) + post_id = data.get("id") + if not post_id: + raise AdapterError(f"REST response missing post id: {data}") + + # Yoast meta (canonical / SEO title / metadesc) is not REST-writable on + # foojay's Yoast install. wp-admin form-submit is blocked by Cloudflare. + # XML-RPC's wp.editPost with custom_fields bypasses both restrictions + # and successfully writes the underscore-prefixed meta keys. + yoast_set = False + try: + self._set_yoast_meta_via_xmlrpc( + post_id=post_id, + canonical=ctx.post.canonical_url, + seo_title=ctx.post.title, + metadesc=_trim_for_meta_description(excerpt), + ) + yoast_set = True + except Exception as err: # noqa: BLE001 — Yoast meta is best-effort + print(f" [foojay] XML-RPC Yoast meta write failed (non-fatal): {err}", file=sys.stderr) + + return { + "id": post_id, + "url": f"https://foojay.io/wp-admin/post.php?post={post_id}&action=edit", + "preview_url": data.get("link"), + "featured_media_id": featured_media_id, + "yoast_meta_set": yoast_set, + "syndicated_at": dt.datetime.now(dt.timezone.utc).isoformat(timespec="seconds"), + } + + # ----- helpers ----- + + def _ensure_tag(self, cookie_header: str, nonce: str) -> int | None: + """Return the WP tag id for `codenameone`, creating it if missing.""" + import urllib.parse as _up + try: + existing = self._rest_get( + f"{self.REST_TAGS_ENDPOINT}?slug={_up.quote(self.CODENAMEONE_TAG_SLUG)}", + cookie_header, + nonce, + ) + if isinstance(existing, list) and existing: + return existing[0].get("id") + created = self._rest_post( + self.REST_TAGS_ENDPOINT, + cookie_header, + nonce, + {"name": self.CODENAMEONE_TAG_NAME, "slug": self.CODENAMEONE_TAG_SLUG}, + ) + return created.get("id") + except Exception as err: # noqa: BLE001 — tag is best-effort + print(f" [foojay] tag resolve/create failed (non-fatal): {err}", file=sys.stderr) + return None + + def _upload_featured_media(self, cookie_header: str, nonce: str, + image_url: str, title: str) -> int: + """Download the cover image and POST it into WP's media library.""" + import urllib.request as _ur + # Download bytes + req = _ur.Request(image_url, headers={"User-Agent": _UA_STR}) + with _ur.urlopen(req, timeout=120) as resp: + image_bytes = resp.read() + content_type = resp.headers.get("Content-Type", "image/jpeg") + filename = image_url.rsplit("/", 1)[-1].split("?", 1)[0] or "cover.jpg" + + upload = _ur.Request(self.REST_MEDIA_ENDPOINT, data=image_bytes, method="POST") + upload.add_header("Content-Type", content_type) + upload.add_header("Content-Disposition", f'attachment; filename="{filename}"') + upload.add_header("X-WP-Nonce", nonce) + upload.add_header("Cookie", cookie_header) + upload.add_header("User-Agent", _UA_STR) + with _ur.urlopen(upload, timeout=120) as response: + data = json.loads(response.read().decode("utf-8")) + media_id = data.get("id") + if not media_id: + raise RuntimeError(f"media upload returned no id: {str(data)[:200]}") + # Set a friendlier title on the media item. + try: + self._rest_post( + f"{self.REST_MEDIA_ENDPOINT}/{media_id}", cookie_header, nonce, + {"title": title, "alt_text": title}, + ) + except Exception: # noqa: BLE001 + # The media-item title and alt text are cosmetic — the upload + # itself already succeeded and the post will reference the + # returned media id regardless. Swallow any follow-up rename + # error so the caller still gets the upload result. + pass + return media_id + + def _set_yoast_meta_via_xmlrpc(self, post_id: int, canonical: str, + seo_title: str, metadesc: str) -> None: + """Update Yoast SEO post meta via XML-RPC's wp.editPost custom_fields. + + REST silently drops these meta keys (not registered for REST writes) + and the wp-admin form-submit path is challenged by Cloudflare. + XML-RPC accepts underscore-prefixed meta keys via custom_fields and + is not Cloudflare-protected on foojay. + """ + import urllib.error as _ue + import urllib.request as _ur + import xml.sax.saxutils as _su + + user = os.environ["FOOJAY_USER"] + pwd = os.environ["FOOJAY_PASSWORD"] + + def cf_member(key: str, value: str) -> str: + return ( + "" + f"key{_su.escape(key)}" + f"value{_su.escape(value)}" + "" + ) + + custom_fields_xml = "".join([ + cf_member("_yoast_wpseo_canonical", canonical), + cf_member("_yoast_wpseo_title", seo_title), + cf_member("_yoast_wpseo_metadesc", metadesc), + ]) + envelope = ( + '' + 'wp.editPost' + "1" + f"{_su.escape(user)}" + f"{_su.escape(pwd)}" + f"{int(post_id)}" + "" + f"custom_fields{custom_fields_xml}" + "" + "" + ) + req = _ur.Request( + self.XMLRPC_ENDPOINT, + data=envelope.encode("utf-8"), + method="POST", + ) + req.add_header("Content-Type", "text/xml") + req.add_header("User-Agent", _UA_STR) + try: + with _ur.urlopen(req, timeout=60) as response: + body = response.read().decode("utf-8", errors="replace") + except _ue.HTTPError as err: + detail = err.read().decode("utf-8", errors="replace") + raise RuntimeError(f"xmlrpc HTTP {err.code}: {detail}") from err + if "" in body: + raise RuntimeError(f"xmlrpc fault: {body[:500]}") + if "1" not in body: + raise RuntimeError(f"xmlrpc unexpected response: {body[:500]}") + + def _rest_get(self, url: str, cookie_header: str, nonce: str) -> Any: + import urllib.request as _ur + req = _ur.Request(url, method="GET") + req.add_header("Accept", "application/json") + req.add_header("X-WP-Nonce", nonce) + req.add_header("Cookie", cookie_header) + req.add_header("User-Agent", _UA_STR) + with _ur.urlopen(req, timeout=60) as response: + return json.loads(response.read().decode("utf-8")) + + def _rest_post(self, url: str, cookie_header: str, nonce: str, + payload: dict[str, Any]) -> dict[str, Any]: + import urllib.error as _ue + import urllib.request as _ur + req = _ur.Request(url, data=json.dumps(payload).encode("utf-8"), method="POST") + req.add_header("Content-Type", "application/json") + req.add_header("Accept", "application/json") + req.add_header("X-WP-Nonce", nonce) + req.add_header("Cookie", cookie_header) + req.add_header("User-Agent", _UA_STR) + try: + with _ur.urlopen(req, timeout=120) as response: + raw = response.read().decode("utf-8") + except _ue.HTTPError as err: + detail = err.read().decode("utf-8", errors="replace") + raise AdapterError(f"REST POST {url} failed HTTP {err.code}: {detail}") from err + return json.loads(raw) if raw else {} + + +ADAPTERS: dict[str, Callable[[], Any]] = { + "foojay": FoojayAdapter, +} + + +# --------------------------------------------------------------------------- # +# Driver # +# --------------------------------------------------------------------------- # + + +def run_adapter(adapter, post: Post, body_markdown: str, headed: bool, validate_only: bool) -> dict[str, Any]: + from playwright.sync_api import sync_playwright + + with sync_playwright() as pw: + launch_kwargs: dict[str, Any] = {"headless": not headed} + browser = pw.chromium.launch(**launch_kwargs) + context_kwargs: dict[str, Any] = { + "viewport": {"width": 1400, "height": 900}, + "user_agent": _UA_STR, + } + context = browser.new_context(**context_kwargs) + # Grant clipboard access so navigator.clipboard.writeText() succeeds. + try: + context.grant_permissions(["clipboard-read", "clipboard-write"]) + except Exception: # noqa: BLE001 + # Firefox and WebKit reject the chromium-only clipboard-* perms. + # Adapters that need the clipboard fall back to other paths + # (Quill API, Froala API, execCommand insertHTML), so a refusal + # here is non-fatal. + pass + + page = context.new_page() + ctx = AdapterContext(post=post, body_markdown=body_markdown, headed=headed, validate_only=validate_only) + + try: + adapter.login(page) + result = adapter.submit_draft(page, ctx) + except Exception as err: # noqa: BLE001 + shot = _save_screenshot(page, post.slug, f"{adapter.name}-error") + raise AdapterError(f"{adapter.name} flow failed (screenshot: {shot}): {err}") from err + finally: + context.close() + browser.close() + return result + + +def parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--platforms", default=DEFAULT_PLATFORMS, + help=f"Comma-separated platforms (default: {DEFAULT_PLATFORMS}).") + parser.add_argument("--dry-run", action="store_true", + help="No browser launched; just print what would happen.") + parser.add_argument("--headed", action="store_true", + help="Run with a visible browser (for local debugging).") + parser.add_argument("--validate-only", action="store_true", + help="Log in and open the editor, then screenshot and exit without submitting.") + parser.add_argument("--today", default=None, help="Override today's date (YYYY-MM-DD).") + parser.add_argument("--floor", default=ELIGIBILITY_FLOOR.isoformat(), + help=f"Posts must be dated strictly after this date (default: {ELIGIBILITY_FLOOR.isoformat()}).") + parser.add_argument("--min-age-days", type=int, default=MIN_AGE_DAYS, + help=f"Minimum post age in days (default: {MIN_AGE_DAYS}).") + parser.add_argument("--blog-dir", default=str(BLOG_DIR)) + parser.add_argument("--state-file", default=str(STATE_FILE)) + return parser.parse_args(argv) + + +def main(argv: list[str]) -> int: + args = parse_args(argv) + today = dt.date.fromisoformat(args.today) if args.today else dt.date.today() + floor = dt.date.fromisoformat(args.floor) + requested = [p.strip() for p in args.platforms.split(",") if p.strip()] + blog_dir = Path(args.blog_dir) + state_file = Path(args.state_file) + + unknown = [p for p in requested if p not in ADAPTERS] + if unknown: + print(f"Unknown platform(s): {unknown}. Known: {sorted(ADAPTERS)}", file=sys.stderr) + return 1 + + adapters: list[Any] = [] + for name in requested: + adapter = ADAPTERS[name]() + if args.dry_run or args.validate_only or adapter.is_configured(): + adapters.append(adapter) + else: + print(f"[{name}] credentials not configured; skipping platform.") + + if not adapters: + print("No browser platforms are configured; nothing to do.") + return 0 + + posts = discover_posts(blog_dir) + state = State.load(state_file) + platform_names = [a.name for a in adapters] + candidate = select_candidate(posts, state, platform_names, today, floor, args.min_age_days) + if candidate is None and not args.validate_only: + print("No syndication candidate found today.") + return 0 + if candidate is None and args.validate_only: + # In validate-only mode, fall back to the newest post so we can still + # verify selectors even when nothing is technically due. + candidate = posts[-1] + print(f"validate-only: using newest post {candidate.slug} for selector verification.") + + print(f"Selected post: {candidate.slug} (date={candidate.date.isoformat()})") + body_markdown = render_syndicated_body(candidate) + + any_change = False + failures: list[str] = [] + + for adapter in adapters: + if state.is_syndicated(candidate.slug, adapter.name) and not args.validate_only: + print(f" [{adapter.name}] already syndicated; skipping.") + continue + if args.dry_run: + print(f" [{adapter.name}] dry run — would publish {len(body_markdown)} chars, " + f"canonical {candidate.canonical_url}") + continue + try: + result = run_adapter(adapter, candidate, body_markdown, args.headed, args.validate_only) + except Exception as err: # noqa: BLE001 + print(f" [{adapter.name}] FAILED: {err}", file=sys.stderr) + failures.append(adapter.name) + continue + + if args.validate_only: + print(f" [{adapter.name}] validated. {json.dumps(result)}") + continue + + if not result.get("url"): + print(f" [{adapter.name}] response missing URL: {result}", file=sys.stderr) + failures.append(adapter.name) + continue + + state.record(candidate.slug, adapter.name, result) + any_change = True + print(f" [{adapter.name}] published draft: {result['url']}") + + if any_change: + state.save(state_file) + print(f"Updated state file: {state_file}") + + if failures: + return 2 + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/scripts/website/syndication-queue.json b/scripts/website/syndication-queue.json new file mode 100644 index 0000000000..8779b9376b --- /dev/null +++ b/scripts/website/syndication-queue.json @@ -0,0 +1,4 @@ +{ + "_comment": "Queue of browser-only syndication tasks consumed by the Codename One Syndicator Firefox extension. The extension polls https://raw.githubusercontent.com/codenameone/CodenameOne/master/scripts/website/syndication-queue.json and processes any task whose id is not already in the user's local 'completed_tasks' list. Tasks are appended by scripts/website/queue_browser_syndication.py which is invoked by the daily blog-syndication workflow.", + "tasks": [] +} diff --git a/scripts/website/syndication-state.json b/scripts/website/syndication-state.json new file mode 100644 index 0000000000..ac96b53ff4 --- /dev/null +++ b/scripts/website/syndication-state.json @@ -0,0 +1,17 @@ +{ + "_comment": "Tracks blog posts syndicated by scripts/website/syndicate_blog_posts.py and syndicate_browser_posts.py. Keyed by post slug. Each platform sub-object records the remote URL/id and ISO timestamp once syndication succeeds.", + "posts": { + "liquid-glass-material-3-modern-native-themes": { + "devto": { + "id": 3620800, + "url": "https://dev.to/codenameone/liquid-glass-material-3-and-a-lot-of-plumbing-2jkk", + "syndicated_at": "2026-05-06T13:08:18+00:00" + }, + "hashnode": { + "id": "69fb2f0263ebe40f84df66db", + "url": "https://debugagent.com/liquid-glass-material-3-and-a-lot-of-plumbing", + "syndicated_at": "2026-05-06T12:24:00+00:00" + } + } + } +}