chore(docs): add Gulp tasks

This commit is contained in:
Greg Annandale
2026-03-31 11:32:21 +01:00
parent 4b921fbccd
commit beb7a14e28
+473 -75
View File
@@ -1,11 +1,12 @@
import {execSync} from 'child_process';
import {Extractor} from 'markdown-tables-to-json';
import * as fs from 'fs';
import * as path from 'path';
import * as gulp from 'gulp';
import header from 'gulp-header';
import replace from 'gulp-replace';
import rename from 'gulp-rename';
const DOCS_DIR = 'docs';
const DOCS_DIR = 'docs/docs/reference/js';
const REFERENCE_SIDEBAR_DIR = 'docs/docs/reference';
/**
* Run API Extractor to generate the intermediate json file.
@@ -41,15 +42,323 @@ const generateDocs = function(done) {
}
/**
* Prepends the project and book metadata that devsite requires.
* Extracts the title from the H2 heading in the content.
* Falls back to filename-based title if H2 not found.
*/
const prependBook = function() {
return gulp.src('docs/*.md')
.pipe(header(
'Project: /blockly/_project.yaml\nBook: /blockly/_book.yaml\n\n'))
const extractTitleFromContent = function(content, filename) {
// Remove frontmatter if exists
let cleanContent = content.replace(/^---[\s\S]*?---\n\n/, '');
// Remove MDX comments
cleanContent = cleanContent.replace(/\{\/\*[\s\S]*?\*\/\}/g, '');
// Find the first ## heading
const headingMatch = cleanContent.match(/##\s+(.+)/);
if (headingMatch) {
// Get the full H2 heading text
let fullTitle = headingMatch[1].trim();
// Remove markdown links
fullTitle = fullTitle.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1');
// Remove inline code backticks
fullTitle = fullTitle.replace(/`([^`]+)`/g, '$1');
// Simplify title: "BlocklyOptions.comments property" -> "Comments property"
// Extract the last part after the last dot
const parts = fullTitle.split('.');
if (parts.length > 1) {
// Get everything after the last dot
return parts[parts.length - 1];
}
return fullTitle;
}
// Fallback to filename-based title
return extractTitle(filename);
};
/**
* Extracts a clean title from the filename.
* Example: "blockly.block_class" -> "Block class"
* Example: "blockly.block_class.addicon_1_method" -> "Addicon method"
*/
const extractTitle = function(filename) {
const nameWithoutExt = filename.replace('.mdx', '').replace('.md', '');
const parts = nameWithoutExt.split('.');
if (parts.length === 2) {
// Top-level page: blockly.block_class -> "Block class"
let name = parts[1];
// Remove suffixes like _class, _namespace, etc.
const suffix = name.match(/_(class|namespace|interface|enum|type|variable)$/);
name = name.replace(/_(class|namespace|interface|enum|type|variable)$/, '');
// Split by underscores and capitalize each word
const words = name.split('_').map(word =>
word.charAt(0).toUpperCase() + word.slice(1)
);
// Add back the suffix with proper spacing
if (suffix) {
words.push(suffix[1]);
}
return words.join(' ');
} else if (parts.length > 2) {
// Sub-page: blockly.block_class.addicon_1_method -> "Addicon method"
let name = parts[parts.length - 1];
// Remove number suffixes and type suffixes
name = name.replace(/_\d+_(method|property|constructor|function|variable)$/, ' $1');
name = name.replace(/^_constructor__\d+_constructor$/, 'Constructor');
// Replace double underscores with space, but keep single underscores
name = name.replace(/__/g, ' ');
name = name.trim();
// Capitalize first letter only
return name.charAt(0).toUpperCase() + name.slice(1);
}
// Fallback: capitalize first letter
return nameWithoutExt.charAt(0).toUpperCase() + nameWithoutExt.slice(1);
};
/**
* Extracts description from the content.
* Gets the first paragraph after the heading, up to the first code block or newline.
* If no paragraph is found, generates a generic fallback description.
*/
const extractDescription = function(content, filename) {
// Remove frontmatter if exists
content = content.replace(/^---[\s\S]*?---\n\n/, '');
// Remove MDX comments
content = content.replace(/\{\/\*[\s\S]*?\*\/\}/g, '');
// Find the first ## heading (usually the main title)
const headingMatch = content.match(/##\s+(.+)/);
if (!headingMatch) {
const title = extractTitle(filename);
return `Blockly - usage reference for the ${title}`;
}
// Get the full H2 heading for fallback description
let fullTitle = headingMatch[1].trim();
fullTitle = fullTitle.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1');
fullTitle = fullTitle.replace(/`([^`]+)`/g, '$1');
// Get content after the heading
const afterHeading = content.substring(content.indexOf(headingMatch[0]) + headingMatch[0].length);
// Look for the first non-empty text after the heading
// It might have 1 or 2 newlines before the description paragraph
const paragraphMatch = afterHeading.match(/\n+([^\n]+(?:\n(?!\n|\*\*|```|##|<table>)[^\n]+)*)/);
if (paragraphMatch) {
// Clean up the description
let description = paragraphMatch[1].trim();
// Remove markdown links but keep the text
description = description.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1');
// Remove inline code backticks
description = description.replace(/`([^`]+)`/g, '$1');
// Remove extra whitespace and newlines
description = description.replace(/\s+/g, ' ');
// Skip if it's empty after cleaning
if (!description) {
return `Blockly - usage reference for the ${fullTitle}`;
}
// Limit to first sentence or 160 characters
const firstSentence = description.match(/^[^.!?]+[.!?]/);
if (firstSentence) {
description = firstSentence[0];
}
if (description.length > 160) {
description = description.substring(0, 157) + '...';
}
return description;
}
// Fallback: Generate generic description using full H2 heading title
return `Blockly - usage reference for the ${fullTitle}`;
};
/**
* Prepends frontmatter to MDX files with title, description, and sidebar config.
*/
const prependFrontmatter = function(done) {
const files = fs.readdirSync(DOCS_DIR);
for (const file of files) {
if (!file.endsWith('.mdx')) continue;
const filePath = path.join(DOCS_DIR, file);
let content = fs.readFileSync(filePath, 'utf8');
// Remove existing frontmatter if present
if (content.startsWith('---\n')) {
const endOfFrontmatter = content.indexOf('---\n', 4);
if (endOfFrontmatter !== -1) {
content = content.substring(endOfFrontmatter + 4).trim() + '\n\n';
}
}
const title = extractTitleFromContent(content, file);
const description = extractDescription(content, file);
let frontmatter = '---\n';
frontmatter += 'sidebar: referenceSidebar\n';
frontmatter += 'hide_title: true\n';
frontmatter += `title: "${title}"\n`;
frontmatter += `description: ${JSON.stringify(description)}\n`;
frontmatter += '---\n\n';
// Write the file with frontmatter
fs.writeFileSync(filePath, frontmatter + content);
}
done();
};
/**
* Converts .md files to .mdx for Docusaurus.
*/
/**
* Post-process MDX files to fix problematic patterns
*/
const fixMdxIssues = function(done) {
const files = fs.readdirSync(DOCS_DIR).filter(f => f.endsWith('.mdx'));
for (const file of files) {
const filePath = path.join(DOCS_DIR, file);
let content = fs.readFileSync(filePath, 'utf8');
// Split content into lines for line-by-line processing
const lines = content.split('\n');
let inCodeBlock = false;
let inTableCell = false;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// Track code blocks
if (line.trim().startsWith('```')) {
inCodeBlock = !inCodeBlock;
continue;
}
// Skip processing inside code blocks
if (inCodeBlock) continue;
// Track if we're entering a table cell (opening tag without closing on same line)
if (line.includes('<td>') && !line.includes('</td>')) {
inTableCell = true;
}
// Remove empty MDX comments
lines[i] = lines[i].replace(/\{\/\*\s*\*\/\}/g, '');
// Remove unnecessary markdown escapes for underscores and brackets
// These are not needed in MDX and can cause display issues
lines[i] = lines[i].replace(/\\_/g, '_');
lines[i] = lines[i].replace(/\\\[/g, '[');
lines[i] = lines[i].replace(/\\\]/g, ']');
// Escape standalone HTML tags (not in tables or code)
if (!lines[i].includes('<table>') && !lines[i].includes('</table>') &&
!lines[i].includes('<thead>') && !lines[i].includes('<tbody>') &&
!lines[i].includes('<tr>') && !lines[i].includes('<th>') && !lines[i].includes('<td>')) {
lines[i] = lines[i].replace(/<([a-z]+)>/g, '`<$1>`');
}
// Handle curly braces ANYWHERE (in tables or outside)
// If the line has curly braces with type-like content, wrap in backticks
const trimmed = lines[i].trim();
if (trimmed && (trimmed.includes('{') || trimmed.includes('\\{')) &&
(trimmed.includes('}') || trimmed.includes('\\}'))) {
// Skip if it's an MDX comment, table/code tag line, or already in backticks
if (trimmed.includes('{/*') || trimmed.includes('*/}') ||
trimmed.includes('<td>') || trimmed.includes('</td>') ||
trimmed.includes('<table>') || trimmed.includes('<tr>') ||
trimmed.startsWith('```') || trimmed.startsWith('`') && trimmed.endsWith('`')) {
// Process table cell content specifically
if (inTableCell && !lines[i].includes('<td>') && !lines[i].includes('</td>')) {
// Remove any existing escaping first
let cleaned = trimmed.replace(/\\\{/g, '{').replace(/\\\}/g, '}');
// Only remove trailing semicolons for nested object type patterns
if (cleaned.match(/\{\s*\[.*\]:\s*\{.*\};\s*\}/)) {
cleaned = cleaned.replace(/;\s*}/g, ' }');
}
if (!cleaned.startsWith('`') || !cleaned.endsWith('`')) {
lines[i] = lines[i].replace(trimmed, '`' + cleaned + '`');
}
}
} else {
// Not in a tag line - wrap curly brace content in backticks
let cleaned = trimmed.replace(/\\\{/g, '{').replace(/\\\}/g, '}');
// Remove trailing semicolons for type patterns
if (cleaned.match(/\{\s*\[.*\]:\s*\{.*\};\s*\}/)) {
cleaned = cleaned.replace(/;\s*}/g, ' }');
}
// Wrap in backticks
lines[i] = lines[i].replace(trimmed, '`' + cleaned + '`');
}
}
// Track if we're exiting a table cell (must be after processing)
if (line.includes('</td>')) {
inTableCell = false;
}
}
content = lines.join('\n');
fs.writeFileSync(filePath, content, 'utf8');
}
done();
};
const convertToMdx = function() {
return gulp.src(`${DOCS_DIR}/*.md`)
// Convert HTML comments to MDX comments
.pipe(replace(/<!--\s*([\s\S]*?)\s*-->/g, '{/* $1 */}'))
// Fix malformed markdown links: [text][/path](https://developers.google.com/path) -> [text](/path)
.pipe(replace(/\[([^\]]+)\]\[([^\]]+)\]\(https:\/\/developers\.google\.com([^)]+)\)/g, '[$1]($2)'))
// Fix all internal links: remove .md extension and convert ./filename to /reference/js/filename
.pipe(replace(/\]\(\.\/([^)]+)\.md\)/g, '](/reference/js/$1)'))
// Replace developers.google.com links with relative paths
.pipe(replace(/https:\/\/developers\.google\.com(\/blockly\/[^)\s"']+)/g, '$1'))
// Replace developers.devsite.google.com links with relative paths
.pipe(replace(/https:\/\/developers\.devsite\.google\.com(\/blockly\/[^)\s"']+)/g, '$1'))
// Fix underscore to hyphen in URL fragments
.pipe(replace(/(\/blockly\/[^)\s"'#]*#[^)\s"']*)_([^)\s"']*)/g, function(match) {
return match.replace(/_/g, '-');
}))
// Remove %5C (URL-encoded backslash) and literal backslash before anchor tags
.pipe(replace(/(%5C|\\)(#[^)\s"']*)/g, '$2'))
.pipe(rename({ extname: '.mdx' }))
.pipe(gulp.dest(DOCS_DIR));
}
/**
* Delete original .md files after conversion to .mdx
*/
const cleanMdFiles = function(done) {
const files = fs.readdirSync(DOCS_DIR);
for (const file of files) {
if (file.endsWith('.md')) {
fs.unlinkSync(path.join(DOCS_DIR, file));
}
}
done();
}
/**
* Creates a map of top-level pages to sub-pages, e.g. a mapping
* of `block_class` to every page associated with that class.
@@ -60,87 +369,176 @@ const prependBook = function() {
const buildAlternatePathsMap = function(allFiles) {
let map = new Map();
for (let file of allFiles) {
// Get the name of the class/namespaces/variable/etc., i.e. the top-level
// page.
let filePieces = file.split('.');
let name = filePieces[1];
if (!map.has(name)) {
map.set(name, []);
if (!file.endsWith('.mdx') || file === 'blockly.mdx' || file === '_reference.js') continue;
// Remove extension
const nameWithoutExt = file.replace('.mdx', '');
// Get the name of the class/namespace/etc., i.e. the top-level page
// Example: blockly.block_class._constructor__1.mdx -> block_class
// Example: blockly.block_class.mdx -> block_class
const parts = nameWithoutExt.split('.');
if (parts.length === 2) {
// This is a top-level page (e.g., blockly.block_class)
const topLevelName = parts[1];
if (!map.has(topLevelName)) {
map.set(topLevelName, []);
}
} else if (parts.length > 2) {
// This is a sub-page (e.g., blockly.block_class._constructor__1_constructor)
const topLevelName = parts[1];
if (!map.has(topLevelName)) {
map.set(topLevelName, []);
}
// Add the full name without extension
map.get(topLevelName).push(nameWithoutExt);
}
if (filePieces[2] === 'md') {
// Don't add the top-level page to the map.
continue;
}
// Add all sub-pages to the array for the corresponding top-level page.
map.get(name).push(file);
}
// Sort sub-pages: constructors first, then alphabetically
for (const [key, value] of map.entries()) {
value.sort((a, b) => {
const aIsConstructor = a.includes('._constructor');
const bIsConstructor = b.includes('._constructor');
if (aIsConstructor && !bIsConstructor) return -1;
if (!aIsConstructor && bIsConstructor) return 1;
return a.localeCompare(b);
});
}
return map;
}
/**
* Create the _toc.yaml file used by devsite to create the leftnav.
* This file is generated from the contents of `blockly.md` which contains links
* to the other top-level API pages (each class, namespace, etc.).
*
* The `alternate_paths` for each top-level page contains the path for
* each associated sub-page. All subpages must be linked to their top-level page
* in the TOC for the left nav bar to remain correct after drilling down into a
* sub-page.
* Parse HTML tables from the blockly.md file to extract classes, interfaces, etc.
* @param {string} fileContent The content of blockly.md
* @returns {Object} Object with sections as keys and arrays of {name, path} as values
*/
const createToc = function(done) {
const fileContent = fs.readFileSync(`${DOCS_DIR}/blockly.md`, 'utf8');
// Create the TOC file. The file should not yet exist; if it does, this
// operation will fail.
const toc = fs.openSync(`${DOCS_DIR}/_toc.yaml`, 'ax');
const files = fs.readdirSync(DOCS_DIR);
const map = buildAlternatePathsMap(files);
const referencePath = '/blockly/reference/js';
const tocHeader = `toc:
- title: Overview
path: /blockly/reference/js/blockly.md\n`;
fs.writeSync(toc, tocHeader);
// Generate a section of TOC for each section/heading in the overview file.
const parseHtmlTables = function(fileContent) {
const result = {};
// Split by ## headings
const sections = fileContent.split('##');
for (let section of sections) {
// This converts the md table in each section to a JS object
const table = Extractor.extractObject(section, 'rows', false);
if (!table) {
continue;
const lines = section.split('\n');
const sectionName = lines[0].trim();
if (!sectionName || sectionName === 'blockly package') continue;
// Find table rows in HTML - match links with or without ./ prefix
const tableRowRegex = /<tr><td>\s*\[([^\]]+)\]\((?:\/reference\/js\/)?([^\)]+)\)/g;
const items = [];
let match;
while ((match = tableRowRegex.exec(section)) !== null) {
const name = match[1];
const href = match[2];
items.push({ name, path: href });
}
// Get the name of the section, i.e. the text immediately after the `##` in
// the source doc
const sectionName = section.split('\n')[0].trim();
fs.writeSync(toc, `- heading: ${sectionName}\n`);
for (let row in table) {
// After going through the Extractor, the markdown is now HTML.
// Each row in the table is now a link (anchor tag).
// Get the target of the link, excluding the first `.` since we don't want
// a relative path.
const path = /href="\.(.*?)"/.exec(row)?.[1];
// Get the name of the link (text in between the <a> and </a>)
const name = /">(.*?)</.exec(row)?.[1];
if (!path || !name) {
continue;
}
fs.writeSync(toc, `- title: ${name}\n path: ${referencePath}${path}\n`);
// Get the list of sub-pages for this page.
// Add each sub-page to the `alternate_paths` property.
let pages = map.get(path.split('.')[1]);
if (pages?.length) {
fs.writeSync(toc, ` alternate_paths:\n`);
for (let page of pages) {
fs.writeSync(toc, ` - ${referencePath}/${page}\n`);
}
}
if (items.length > 0) {
result[sectionName] = items;
}
}
return result;
}
/**
* Create the _reference.js file for Docusaurus sidebar.
* This file is generated from the contents of `blockly.mdx` which contains links
* to the other top-level API pages (each class, namespace, etc.).
*/
const createReferenceSidebar = function(done) {
const fileContent = fs.readFileSync(`${DOCS_DIR}/blockly.mdx`, 'utf8');
const files = fs.readdirSync(DOCS_DIR);
const map = buildAlternatePathsMap(files);
// Parse HTML tables from the file
const sections = parseHtmlTables(fileContent);
let sidebarContent = 'export const referenceSidebar = [\n';
// Add overview
sidebarContent += ' {\n';
sidebarContent += ' "type": "doc",\n';
sidebarContent += ' "label": "Overview",\n';
sidebarContent += ' "id": "reference/js/blockly"\n';
sidebarContent += ' },\n';
// Process each section (Classes, Interfaces, Functions, etc.)
for (const [sectionName, items] of Object.entries(sections)) {
sidebarContent += ' {\n';
sidebarContent += ' "type": "category",\n';
sidebarContent += ` "label": "${sectionName}",\n`;
sidebarContent += ' "collapsible": true,\n';
sidebarContent += ' "className": "hide-level-3",\n';
sidebarContent += ' "items": [\n';
// Add items for this section
for (const item of items) {
const itemName = item.name;
const itemPath = item.path.replace('.md', '').replace('.mdx', '');
const baseName = itemPath.replace('blockly.', '');
// Check if this item has sub-pages
const subPages = map.get(baseName);
if (subPages && subPages.length > 0) {
// Item with sub-pages - create a category
sidebarContent += ' {\n';
sidebarContent += ' "type": "category",\n';
sidebarContent += ` "label": "${itemName}",\n`;
sidebarContent += ' "link": {\n';
sidebarContent += ' "type": "doc",\n';
sidebarContent += ` "id": "reference/js/${itemPath}"\n`;
sidebarContent += ' },\n';
sidebarContent += ' "items": [\n';
// Add sub-pages
for (const subPage of subPages) {
const subPageId = subPage.replace('blockly.', '');
sidebarContent += ' {\n';
sidebarContent += ' "type": "doc",\n';
sidebarContent += ` "label": "${subPage}",\n`;
sidebarContent += ` "id": "reference/js/${subPage}"\n`;
sidebarContent += ' },\n';
}
sidebarContent += ' ],\n';
if (sectionName === 'Classes' || sectionName === 'Abstract Classes') {
sidebarContent += ' "className": "hide-from-sidebar"\n';
}
sidebarContent += ' },\n';
} else {
// Simple item without sub-pages
sidebarContent += ' {\n';
sidebarContent += ' "type": "doc",\n';
sidebarContent += ` "label": "${itemName}",\n`;
sidebarContent += ` "id": "reference/js/${itemPath}"\n`;
sidebarContent += ' },\n';
}
}
sidebarContent += ' ]\n';
sidebarContent += ' },\n';
}
sidebarContent += '];\n';
// Write the file to the reference directory
if (!fs.existsSync(REFERENCE_SIDEBAR_DIR)) {
fs.mkdirSync(REFERENCE_SIDEBAR_DIR, { recursive: true });
}
fs.writeFileSync(`${REFERENCE_SIDEBAR_DIR}/_reference.js`, sidebarContent);
done();
}
export const docs = gulp.series(
generateApiJson, removeRenames, generateDocs,
gulp.parallel(prependBook, createToc));
generateApiJson, removeRenames, generateDocs, convertToMdx, cleanMdFiles, fixMdxIssues, prependFrontmatter, createReferenceSidebar);