Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions definitions/output/crawl/pages.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@ assert('corrupted_technology_values')
.tags(['crawl_complete'])
.query(ctx => `
SELECT
date,
client,
tech,
COUNT(DISTINCT page) AS cnt_pages,
ARRAY_AGG(DISTINCT page LIMIT 3) AS sample_pages
/*
date,
client,
tech,
ARRAY_AGG(DISTINCT page LIMIT 3) AS sample_pages,
*/
COUNT(DISTINCT page) AS cnt_pages
FROM ${ctx.ref('crawl_staging', 'pages')} AS pages
LEFT JOIN pages.technologies AS tech
LEFT JOIN tech.categories AS category
Expand All @@ -18,11 +20,14 @@ WHERE
OR category NOT IN (SELECT DISTINCT name FROM wappalyzer.categories)
OR ARRAY_LENGTH(tech.categories) = 0
)
/*
GROUP BY
date,
client,
tech
ORDER BY cnt_pages DESC
*/
HAVING cnt_pages > 200
`)

publish('pages', {
Expand Down