Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
60aea08
Article-mode oEmbed extraction for video pages + V1 payload parity
wikirby May 14, 2026
9bc0549
Fix article-mode highlighter under sandboxed preview iframe
wikirby May 14, 2026
ca6ac23
Roomier sidebar group spacing; bump window height cap to fit
wikirby May 14, 2026
a292926
Revert window-height cap bump (keep sidebar spacing change)
wikirby May 14, 2026
fe40b3d
Restore Segoe UI @font-face for Mac/Linux users
wikirby May 14, 2026
be5b15c
Selection mode + ContextImage parity + cross-mode highlight preservation
wikirby May 17, 2026
21914ae
oEmbed article description: use Readability excerpt for V1 parity
wikirby May 17, 2026
6af8a0d
Mode selector: collapse 5-6 stacked buttons into a Fluent 2 Dropdown
wikirby May 17, 2026
4db0d29
Skip eager full-page capture on context-menu invocations
wikirby May 18, 2026
70b0fb1
Register contextMenus.onClicked synchronously on worker startup
wikirby May 18, 2026
59a4f25
Open in Article mode on known oEmbed pages (toolbar invoke)
wikirby May 18, 2026
d343f64
Remove unused "storage" permission from manifests
wikirby May 18, 2026
ea8f1d9
Fix three issues surfaced by dropdown + deferred-capture work
wikirby May 18, 2026
2c9d42b
Revert "Mode selector: collapse 5-6 stacked buttons into a Fluent 2 D…
wikirby May 19, 2026
7b84921
PDF attach: visual indicator at top of preview when checkbox is on
wikirby May 19, 2026
6936290
Remove unused "cookies" permission from Edge manifest
wikirby May 19, 2026
6566499
Refresh user state on renderer open (V1 parity)
wikirby May 19, 2026
2529b97
Address Aanchal's review feedback
wikirby May 20, 2026
2dd9340
Trim verbose comments across PR-touched files
wikirby May 21, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/renderer.html
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ <h2 class="signin-title">OneNote Web Clipper</h2>
<button class="mode-btn" data-mode="pdf" aria-pressed="false" style="display:none;">
<img class="mode-icon-img" src="images/pdf.png" alt=""><span>PDF</span>
</button>
<button class="mode-btn" data-mode="selection" aria-pressed="false" style="display:none;">
<svg class="mode-icon" width="20" height="20" viewBox="0 0 20 20" fill="none" aria-hidden="true"><path d="M3 5.25C3 4.55964 3.55964 4 4.25 4C4.6836 4 5.06594 4.22035 5.29087 4.5575C5.44413 4.78722 5.75458 4.8492 5.9843 4.69594C6.21401 4.54269 6.27599 4.23223 6.12273 4.00252C5.72021 3.39918 5.03182 3 4.25 3C3.00736 3 2 4.00736 2 5.25C2 6.49264 3.00736 7.5 4.25 7.5C4.27225 7.5 4.29416 7.49855 4.31565 7.49573C4.33715 7.49855 4.35908 7.50001 4.38135 7.50001L15.5 7.5C15.7761 7.5 16 7.27614 16 7C16 6.72386 15.7761 6.5 15.5 6.5L4.38135 6.50001C4.3591 6.50001 4.33718 6.50146 4.3157 6.50428C4.2942 6.50146 4.27227 6.5 4.25 6.5C3.55964 6.5 3 5.94036 3 5.25ZM8.49988 4C8.22374 4 7.99988 4.22386 7.99988 4.5C7.99988 4.77614 8.22374 5 8.49988 5H17.4999C17.776 5 17.9999 4.77614 17.9999 4.5C17.9999 4.22386 17.776 4 17.4999 4H8.49988ZM5.50055 9C5.22441 9 5.00055 9.22386 5.00055 9.5C5.00055 9.77614 5.22441 10 5.50055 10H11.3369C12.2316 9.36997 13.3226 9 14.5 9H5.50055ZM2.5 11.5H9.88947C9.68555 11.8128 9.51259 12.1475 9.37494 12.5H2.5C2.22386 12.5 2 12.2761 2 12C2 11.7239 2.22386 11.5 2.5 11.5ZM5.50055 14H9.02242C9.00758 14.1647 9 14.3315 9 14.5C9 14.6685 9.00758 14.8353 9.02242 15H5.50055C5.22441 15 5.00055 14.7761 5.00055 14.5C5.00055 14.2239 5.22441 14 5.50055 14ZM19 14.5C19 16.9853 16.9853 19 14.5 19C12.0147 19 10 16.9853 10 14.5C10 12.0147 12.0147 10 14.5 10C16.9853 10 19 12.0147 19 14.5ZM15 12.5C15 12.2239 14.7761 12 14.5 12C14.2239 12 14 12.2239 14 12.5V14H12.5C12.2239 14 12 14.2239 12 14.5C12 14.7761 12.2239 15 12.5 15H14V16.5C14 16.7761 14.2239 17 14.5 17C14.7761 17 15 16.7761 15 16.5V15H16.5C16.7761 15 17 14.7761 17 14.5C17 14.2239 16.7761 14 16.5 14H15V12.5Z" fill="currentColor"/></svg><span>Selection</span>
</button>
</div>
<div id="pdf-options" style="display:none;" role="group" aria-label="PDF options">
<div id="pdf-page-selection" role="radiogroup" aria-label="Page selection">
Expand Down Expand Up @@ -151,6 +154,7 @@ <h2 class="signin-title">OneNote Web Clipper</h2>
</div>
<div id="aria-status" class="sr-only" aria-live="polite" aria-atomic="true"></div>
<script src="pdf.combined.js"></script>
<script src="textHighlighter.js"></script>
<script src="renderer.js"></script>
</body>
</html>
122 changes: 122 additions & 0 deletions src/scripts/contentCapture/oembedExtractor.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
// oEmbed-based extraction for rich-media pages. Caller falls back to Readability on null.

interface OEmbedProvider {
name: string;
endpoint: string;
hostPattern: string;
}

export interface OEmbedData {
type: string; // "video" | "photo" | "link" | "rich"
html?: string; // present for video / rich
url?: string; // present for photo
width?: number;
height?: number;
title?: string;
author_name?: string;
thumbnail_url?: string;
provider_name?: string;
pageUrl: string; // echo of the page URL we matched against
}

// V1 parity: YouTube + Vimeo. (Khan Academy V1 entry scraped embedded YouTube iframes,
// which our YouTube provider already handles.)
const PROVIDERS: OEmbedProvider[] = [
{ name: "YouTube", endpoint: "https://www.youtube.com/oembed", hostPattern: "youtube.com" },
{ name: "YouTube", endpoint: "https://www.youtube.com/oembed", hostPattern: "youtu.be" },
{ name: "Vimeo", endpoint: "https://vimeo.com/api/oembed.json", hostPattern: "vimeo.com" },
];

function matchProvider(url: string): OEmbedProvider | null {
let parsed: URL;
try {
parsed = new URL(url);
} catch (e) {
return null;
}
const host = parsed.hostname.toLowerCase();
const hostAndPath = (host + parsed.pathname).toLowerCase();

for (const provider of PROVIDERS) {
const pattern = provider.hostPattern.toLowerCase();

if (pattern.indexOf("/") !== -1) {
if (hostAndPath === pattern
|| hostAndPath.indexOf(pattern) === 0
|| hostAndPath.indexOf("." + pattern) !== -1) {
return provider;
}
} else if (pattern.charAt(pattern.length - 1) === ".") {
if (host.indexOf(pattern) === 0) {
return provider;
}
} else {
if (host === pattern || host.indexOf("." + pattern) === host.length - pattern.length - 1) {
return provider;
}
}
}
return null;
}

// Strip executable surfaces from provider HTML; preserve iframes/anchors/images.
export function sanitizeProviderHtml(html: string): string {
const doc = new DOMParser().parseFromString(html, "text/html");

const removable = doc.querySelectorAll("script, object, embed, link, style, meta");
for (let i = removable.length - 1; i >= 0; i--) {
const el = removable[i];
if (el.parentNode) { el.parentNode.removeChild(el); }
}

const all = doc.querySelectorAll("*");
for (let i = 0; i < all.length; i++) {
const el = all[i] as HTMLElement;
const attrs = el.attributes;
for (let j = attrs.length - 1; j >= 0; j--) {
const name = attrs[j].name.toLowerCase();
const value = attrs[j].value;
if (name.indexOf("on") === 0) {
el.removeAttribute(attrs[j].name);
} else if ((name === "href" || name === "src") && /^\s*javascript:/i.test(value)) {
el.removeAttribute(attrs[j].name);
}
}
}

return doc.body ? doc.body.innerHTML : "";
}

// Sync hostname check (no fetch).
export function isOEmbedProviderUrl(pageUrl: string): boolean {
return matchProvider(pageUrl) !== null;
}

// Returns oEmbed payload, or null on no-match / fetch failure.
export async function tryOEmbed(pageUrl: string): Promise<OEmbedData | null> {
if (!pageUrl) { return null; }

const provider = matchProvider(pageUrl);
if (!provider) { return null; }

const endpoint = provider.endpoint
+ "?url=" + encodeURIComponent(pageUrl)
+ "&format=json&maxwidth=600";

try {
const resp = await fetch(endpoint);
if (!resp.ok) { return null; }
const data = await resp.json() as Partial<OEmbedData>;
// "link" type carries metadata only; let Readability handle it.
if (data.type !== "video" && data.type !== "rich" && data.type !== "photo") {
return null;
}
if (!data.provider_name) {
data.provider_name = provider.name;
}
data.pageUrl = pageUrl;
return data as OEmbedData;
} catch (e) {
return null;
}
}
3 changes: 1 addition & 2 deletions src/scripts/extensions/chrome/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@
"tabs",
"webRequest",
"webNavigation",
"offscreen",
"storage"
"offscreen"
],
"host_permissions": [
"<all_urls>"
Expand Down
40 changes: 39 additions & 1 deletion src/scripts/extensions/contentCaptureInject.ts
Original file line number Diff line number Diff line change
Expand Up @@ -272,12 +272,50 @@
let contentType = detectContentType();
let html = resolveLazyImages(getDomString(doc));

// Selection through the same DOM-cleanup pipeline; materialize URLs first so they survive base-tag strip.
function captureSelectionHtml(): string {
try {
let sel = window.getSelection();
if (!sel || sel.rangeCount === 0) { return ""; }
let range = sel.getRangeAt(0);
if (range.collapsed) { return ""; }

let selDoc = cloneDocument(document);
if (!selDoc.body) { return ""; }
while (selDoc.body.firstChild) { selDoc.body.removeChild(selDoc.body.firstChild); }
selDoc.body.appendChild(selDoc.importNode(range.cloneContents(), true));

addBaseTagIfNecessary(selDoc, document.location);
addImageSizeInformationToDom(selDoc);
removeUnwantedItems(selDoc);

// Only img.src + a.href need materializing; other URL-bearing tags are stripped downstream.
let materialize = (selector: string, prop: string, attr: string) => {
let els = selDoc.querySelectorAll(selector);
for (let i = 0; i < els.length; i++) {
let el = els[i] as HTMLElement;
let resolved = (el as any)[prop];
if (resolved && typeof resolved === "string") { el.setAttribute(attr, resolved); }
}
};
materialize("img[src]", "src", "src");
materialize("a[href]", "href", "href");

return selDoc.body.innerHTML;
} catch (e) {
return "";
}
}
// Resolve lazy images on the selection branch too, else lazy-loaded <img> lands with no src.
let selectionHtml = resolveLazyImages(captureSelectionHtml());

chrome.runtime.sendMessage(JSON.stringify({
action: "contentCaptureComplete",
html: html,
baseUrl: document.baseURI || document.URL,
title: document.title || "",
url: document.URL || "",
contentType: contentType
contentType: contentType,
selectionHtml: selectionHtml
}));
})();
4 changes: 1 addition & 3 deletions src/scripts/extensions/edge/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,10 @@
"activeTab",
"scripting",
"contextMenus",
"cookies",
"tabs",
"webRequest",
"webNavigation",
"offscreen",
"storage"
"offscreen"
],

"host_permissions": [
Expand Down
75 changes: 43 additions & 32 deletions src/scripts/extensions/webExtensionBase/webExtension.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ export class WebExtension extends ExtensionBase<WebExtensionWorker, W3CTab, numb

this.registerBrowserButton();

// Listener registers synchronously at SW startup; menu items themselves need locStrings.
this.registerContextMenuClickListener();
this.clipperIdProcessed.then(() => {
this.registerContextMenuItems();
});
Expand Down Expand Up @@ -184,43 +186,52 @@ export class WebExtension extends ExtensionBase<WebExtensionWorker, W3CTab, numb
menus[i].documentUrlPatterns = documentUrlPatternList;
}
WebExtension.browser.contextMenus.create(menus[i]);
WebExtension.browser.contextMenus.onClicked.addListener((info, tab: W3CTab) => {
switch (info.menuItemId) {
case "WebClipper.Label.OneNoteWebClipper":
this.invokeClipperInTab(tab, { invokeSource: InvokeSource.ContextMenu }, { invokeMode: InvokeMode.Default });
break;
case "WebClipper.Label.ClipSelectionToOneNote":
let invokeOptions: InvokeOptions = { invokeMode: InvokeMode.ContextTextSelection };

// If the tab index is negative, chances are the user is using some sort of PDF plugin,
// and the tab object will be invalid. We need to get the parent tab in this scenario.
if (tab.index < 0) {
// Since we are in a PDF plugin, Rangy won't work, so we rely on WebExtension API to grab pure text
invokeOptions.invokeDataForMode = info.selectionText;
WebExtension.browser.tabs.query({ active: true, currentWindow: true }, (tabs: W3CTab[]) => {
// There will only be one tab that meets this criteria
let parentTab = tabs[0];
this.invokeClipperInTab(parentTab, { invokeSource: InvokeSource.ContextMenu }, invokeOptions);
});
} else {
this.invokeClipperInTab(tab, { invokeSource: InvokeSource.ContextMenu }, invokeOptions);
}
break;
case "WebClipper.Label.ClipImageToOneNote":
// Even though we know the user right-clicked an image, srcUrl is only present if the src attr exists
this.invokeClipperInTab(tab, { invokeSource: InvokeSource.ContextMenu }, info.srcUrl ? {
// srcUrl will always be the full url, not relative
invokeDataForMode: info.srcUrl, invokeMode: InvokeMode.ContextImage
} : undefined);
break;
default:
}
});
}
});
});
}

// Synchronous part of context-menu setup — dispatches on info.menuItemId only,
// no locStrings dependency. See constructor for why this is split out.
private registerContextMenuClickListener() {
WebExtension.browser.contextMenus.onClicked.addListener((info, tab?: Tab) => {
if (!tab) {
return;
}
let clickedTab = tab as W3CTab;
switch (info.menuItemId) {
case "WebClipper.Label.OneNoteWebClipper":
this.invokeClipperInTab(clickedTab, { invokeSource: InvokeSource.ContextMenu }, { invokeMode: InvokeMode.Default });
break;
case "WebClipper.Label.ClipSelectionToOneNote":
let invokeOptions: InvokeOptions = { invokeMode: InvokeMode.ContextTextSelection };

// If the tab index is negative, chances are the user is using some sort of PDF plugin,
// and the tab object will be invalid. We need to get the parent tab in this scenario.
if (clickedTab.index < 0) {
// Since we are in a PDF plugin, Rangy won't work, so we rely on WebExtension API to grab pure text
invokeOptions.invokeDataForMode = info.selectionText;
WebExtension.browser.tabs.query({ active: true, currentWindow: true }, (tabs: Tab[]) => {
// There will only be one tab that meets this criteria
let parentTab = tabs[0] as W3CTab;
this.invokeClipperInTab(parentTab, { invokeSource: InvokeSource.ContextMenu }, invokeOptions);
});
} else {
this.invokeClipperInTab(clickedTab, { invokeSource: InvokeSource.ContextMenu }, invokeOptions);
}
break;
case "WebClipper.Label.ClipImageToOneNote":
// Even though we know the user right-clicked an image, srcUrl is only present if the src attr exists
this.invokeClipperInTab(clickedTab, { invokeSource: InvokeSource.ContextMenu }, info.srcUrl ? {
// srcUrl will always be the full url, not relative
invokeDataForMode: info.srcUrl, invokeMode: InvokeMode.ContextImage
} : { invokeMode: InvokeMode.Default });
break;
default:
}
});
}

private registerInstallListener() {
// onInstalled is undefined as of Firefox 48
if (this.onInstalledSupported()) {
Expand Down
Loading
Loading