From beb7a14e28b2417a3c867e5f4ff4a48ea1dfa6e2 Mon Sep 17 00:00:00 2001 From: Greg Annandale Date: Tue, 31 Mar 2026 11:32:21 +0100 Subject: [PATCH] chore(docs): add Gulp tasks --- .../blockly/scripts/gulpfiles/docs_tasks.mjs | 548 +++++++++++++++--- 1 file changed, 473 insertions(+), 75 deletions(-) diff --git a/packages/blockly/scripts/gulpfiles/docs_tasks.mjs b/packages/blockly/scripts/gulpfiles/docs_tasks.mjs index 51abd480f..f80cbdce8 100644 --- a/packages/blockly/scripts/gulpfiles/docs_tasks.mjs +++ b/packages/blockly/scripts/gulpfiles/docs_tasks.mjs @@ -1,11 +1,12 @@ import {execSync} from 'child_process'; -import {Extractor} from 'markdown-tables-to-json'; import * as fs from 'fs'; +import * as path from 'path'; import * as gulp from 'gulp'; -import header from 'gulp-header'; import replace from 'gulp-replace'; +import rename from 'gulp-rename'; -const DOCS_DIR = 'docs'; +const DOCS_DIR = 'docs/docs/reference/js'; +const REFERENCE_SIDEBAR_DIR = 'docs/docs/reference'; /** * Run API Extractor to generate the intermediate json file. @@ -41,15 +42,323 @@ const generateDocs = function(done) { } /** - * Prepends the project and book metadata that devsite requires. + * Extracts the title from the H2 heading in the content. + * Falls back to filename-based title if H2 not found. */ -const prependBook = function() { - return gulp.src('docs/*.md') - .pipe(header( - 'Project: /blockly/_project.yaml\nBook: /blockly/_book.yaml\n\n')) +const extractTitleFromContent = function(content, filename) { + // Remove frontmatter if exists + let cleanContent = content.replace(/^---[\s\S]*?---\n\n/, ''); + + // Remove MDX comments + cleanContent = cleanContent.replace(/\{\/\*[\s\S]*?\*\/\}/g, ''); + + // Find the first ## heading + const headingMatch = cleanContent.match(/##\s+(.+)/); + if (headingMatch) { + // Get the full H2 heading text + let fullTitle = headingMatch[1].trim(); + // Remove markdown links + fullTitle = fullTitle.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1'); + // Remove inline code backticks + fullTitle = fullTitle.replace(/`([^`]+)`/g, '$1'); + + // Simplify title: "BlocklyOptions.comments property" -> "Comments property" + // Extract the last part after the last dot + const parts = fullTitle.split('.'); + if (parts.length > 1) { + // Get everything after the last dot + return parts[parts.length - 1]; + } + + return fullTitle; + } + + // Fallback to filename-based title + return extractTitle(filename); +}; + +/** + * Extracts a clean title from the filename. + * Example: "blockly.block_class" -> "Block class" + * Example: "blockly.block_class.addicon_1_method" -> "Addicon method" + */ +const extractTitle = function(filename) { + const nameWithoutExt = filename.replace('.mdx', '').replace('.md', ''); + const parts = nameWithoutExt.split('.'); + + if (parts.length === 2) { + // Top-level page: blockly.block_class -> "Block class" + let name = parts[1]; + // Remove suffixes like _class, _namespace, etc. + const suffix = name.match(/_(class|namespace|interface|enum|type|variable)$/); + name = name.replace(/_(class|namespace|interface|enum|type|variable)$/, ''); + + // Split by underscores and capitalize each word + const words = name.split('_').map(word => + word.charAt(0).toUpperCase() + word.slice(1) + ); + + // Add back the suffix with proper spacing + if (suffix) { + words.push(suffix[1]); + } + + return words.join(' '); + } else if (parts.length > 2) { + // Sub-page: blockly.block_class.addicon_1_method -> "Addicon method" + let name = parts[parts.length - 1]; + // Remove number suffixes and type suffixes + name = name.replace(/_\d+_(method|property|constructor|function|variable)$/, ' $1'); + name = name.replace(/^_constructor__\d+_constructor$/, 'Constructor'); + // Replace double underscores with space, but keep single underscores + name = name.replace(/__/g, ' '); + name = name.trim(); + // Capitalize first letter only + return name.charAt(0).toUpperCase() + name.slice(1); + } + + // Fallback: capitalize first letter + return nameWithoutExt.charAt(0).toUpperCase() + nameWithoutExt.slice(1); +}; + +/** + * Extracts description from the content. + * Gets the first paragraph after the heading, up to the first code block or newline. + * If no paragraph is found, generates a generic fallback description. + */ +const extractDescription = function(content, filename) { + // Remove frontmatter if exists + content = content.replace(/^---[\s\S]*?---\n\n/, ''); + + // Remove MDX comments + content = content.replace(/\{\/\*[\s\S]*?\*\/\}/g, ''); + + // Find the first ## heading (usually the main title) + const headingMatch = content.match(/##\s+(.+)/); + if (!headingMatch) { + const title = extractTitle(filename); + return `Blockly - usage reference for the ${title}`; + } + + // Get the full H2 heading for fallback description + let fullTitle = headingMatch[1].trim(); + fullTitle = fullTitle.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1'); + fullTitle = fullTitle.replace(/`([^`]+)`/g, '$1'); + + // Get content after the heading + const afterHeading = content.substring(content.indexOf(headingMatch[0]) + headingMatch[0].length); + + // Look for the first non-empty text after the heading + // It might have 1 or 2 newlines before the description paragraph + const paragraphMatch = afterHeading.match(/\n+([^\n]+(?:\n(?!\n|\*\*|```|##|)[^\n]+)*)/); + + if (paragraphMatch) { + // Clean up the description + let description = paragraphMatch[1].trim(); + + // Remove markdown links but keep the text + description = description.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1'); + + // Remove inline code backticks + description = description.replace(/`([^`]+)`/g, '$1'); + + // Remove extra whitespace and newlines + description = description.replace(/\s+/g, ' '); + + // Skip if it's empty after cleaning + if (!description) { + return `Blockly - usage reference for the ${fullTitle}`; + } + + // Limit to first sentence or 160 characters + const firstSentence = description.match(/^[^.!?]+[.!?]/); + if (firstSentence) { + description = firstSentence[0]; + } + + if (description.length > 160) { + description = description.substring(0, 157) + '...'; + } + + return description; + } + + // Fallback: Generate generic description using full H2 heading title + return `Blockly - usage reference for the ${fullTitle}`; +}; + +/** + * Prepends frontmatter to MDX files with title, description, and sidebar config. + */ +const prependFrontmatter = function(done) { + const files = fs.readdirSync(DOCS_DIR); + + for (const file of files) { + if (!file.endsWith('.mdx')) continue; + + const filePath = path.join(DOCS_DIR, file); + let content = fs.readFileSync(filePath, 'utf8'); + + // Remove existing frontmatter if present + if (content.startsWith('---\n')) { + const endOfFrontmatter = content.indexOf('---\n', 4); + if (endOfFrontmatter !== -1) { + content = content.substring(endOfFrontmatter + 4).trim() + '\n\n'; + } + } + + const title = extractTitleFromContent(content, file); + const description = extractDescription(content, file); + + let frontmatter = '---\n'; + frontmatter += 'sidebar: referenceSidebar\n'; + frontmatter += 'hide_title: true\n'; + frontmatter += `title: "${title}"\n`; + frontmatter += `description: ${JSON.stringify(description)}\n`; + frontmatter += '---\n\n'; + + // Write the file with frontmatter + fs.writeFileSync(filePath, frontmatter + content); + } + + done(); +}; + +/** + * Converts .md files to .mdx for Docusaurus. + */ +/** + * Post-process MDX files to fix problematic patterns + */ +const fixMdxIssues = function(done) { + const files = fs.readdirSync(DOCS_DIR).filter(f => f.endsWith('.mdx')); + + for (const file of files) { + const filePath = path.join(DOCS_DIR, file); + let content = fs.readFileSync(filePath, 'utf8'); + + // Split content into lines for line-by-line processing + const lines = content.split('\n'); + let inCodeBlock = false; + let inTableCell = false; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + + // Track code blocks + if (line.trim().startsWith('```')) { + inCodeBlock = !inCodeBlock; + continue; + } + + // Skip processing inside code blocks + if (inCodeBlock) continue; + + // Track if we're entering a table cell (opening tag without closing on same line) + if (line.includes('')) { + inTableCell = true; + } + + // Remove empty MDX comments + lines[i] = lines[i].replace(/\{\/\*\s*\*\/\}/g, ''); + + // Remove unnecessary markdown escapes for underscores and brackets + // These are not needed in MDX and can cause display issues + lines[i] = lines[i].replace(/\\_/g, '_'); + lines[i] = lines[i].replace(/\\\[/g, '['); + lines[i] = lines[i].replace(/\\\]/g, ']'); + + // Escape standalone HTML tags (not in tables or code) + if (!lines[i].includes('
') && !line.includes('
') && !lines[i].includes('
') && + !lines[i].includes('') && !lines[i].includes('') && + !lines[i].includes('') && !lines[i].includes('') && !lines[i].includes('')) { + lines[i] = lines[i].replace(/<([a-z]+)>/g, '`<$1>`'); + } + + // Handle curly braces ANYWHERE (in tables or outside) + // If the line has curly braces with type-like content, wrap in backticks + const trimmed = lines[i].trim(); + if (trimmed && (trimmed.includes('{') || trimmed.includes('\\{')) && + (trimmed.includes('}') || trimmed.includes('\\}'))) { + + // Skip if it's an MDX comment, table/code tag line, or already in backticks + if (trimmed.includes('{/*') || trimmed.includes('*/}') || + trimmed.includes('') || trimmed.includes('') || + trimmed.includes('') || trimmed.includes('') || + trimmed.startsWith('```') || trimmed.startsWith('`') && trimmed.endsWith('`')) { + // Process table cell content specifically + if (inTableCell && !lines[i].includes('')) { + // Remove any existing escaping first + let cleaned = trimmed.replace(/\\\{/g, '{').replace(/\\\}/g, '}'); + // Only remove trailing semicolons for nested object type patterns + if (cleaned.match(/\{\s*\[.*\]:\s*\{.*\};\s*\}/)) { + cleaned = cleaned.replace(/;\s*}/g, ' }'); + } + if (!cleaned.startsWith('`') || !cleaned.endsWith('`')) { + lines[i] = lines[i].replace(trimmed, '`' + cleaned + '`'); + } + } + } else { + // Not in a tag line - wrap curly brace content in backticks + let cleaned = trimmed.replace(/\\\{/g, '{').replace(/\\\}/g, '}'); + // Remove trailing semicolons for type patterns + if (cleaned.match(/\{\s*\[.*\]:\s*\{.*\};\s*\}/)) { + cleaned = cleaned.replace(/;\s*}/g, ' }'); + } + // Wrap in backticks + lines[i] = lines[i].replace(trimmed, '`' + cleaned + '`'); + } + } + + // Track if we're exiting a table cell (must be after processing) + if (line.includes('')) { + inTableCell = false; + } + } + + content = lines.join('\n'); + fs.writeFileSync(filePath, content, 'utf8'); + } + + done(); +}; + +const convertToMdx = function() { + return gulp.src(`${DOCS_DIR}/*.md`) + // Convert HTML comments to MDX comments + .pipe(replace(//g, '{/* $1 */}')) + // Fix malformed markdown links: [text][/path](https://developers.google.com/path) -> [text](/path) + .pipe(replace(/\[([^\]]+)\]\[([^\]]+)\]\(https:\/\/developers\.google\.com([^)]+)\)/g, '[$1]($2)')) + // Fix all internal links: remove .md extension and convert ./filename to /reference/js/filename + .pipe(replace(/\]\(\.\/([^)]+)\.md\)/g, '](/reference/js/$1)')) + // Replace developers.google.com links with relative paths + .pipe(replace(/https:\/\/developers\.google\.com(\/blockly\/[^)\s"']+)/g, '$1')) + // Replace developers.devsite.google.com links with relative paths + .pipe(replace(/https:\/\/developers\.devsite\.google\.com(\/blockly\/[^)\s"']+)/g, '$1')) + + // Fix underscore to hyphen in URL fragments + .pipe(replace(/(\/blockly\/[^)\s"'#]*#[^)\s"']*)_([^)\s"']*)/g, function(match) { + return match.replace(/_/g, '-'); + })) + // Remove %5C (URL-encoded backslash) and literal backslash before anchor tags + .pipe(replace(/(%5C|\\)(#[^)\s"']*)/g, '$2')) + .pipe(rename({ extname: '.mdx' })) .pipe(gulp.dest(DOCS_DIR)); } +/** + * Delete original .md files after conversion to .mdx + */ +const cleanMdFiles = function(done) { + const files = fs.readdirSync(DOCS_DIR); + for (const file of files) { + if (file.endsWith('.md')) { + fs.unlinkSync(path.join(DOCS_DIR, file)); + } + } + done(); +} + /** * Creates a map of top-level pages to sub-pages, e.g. a mapping * of `block_class` to every page associated with that class. @@ -60,87 +369,176 @@ const prependBook = function() { const buildAlternatePathsMap = function(allFiles) { let map = new Map(); for (let file of allFiles) { - // Get the name of the class/namespaces/variable/etc., i.e. the top-level - // page. - let filePieces = file.split('.'); - let name = filePieces[1]; - if (!map.has(name)) { - map.set(name, []); + if (!file.endsWith('.mdx') || file === 'blockly.mdx' || file === '_reference.js') continue; + + // Remove extension + const nameWithoutExt = file.replace('.mdx', ''); + + // Get the name of the class/namespace/etc., i.e. the top-level page + // Example: blockly.block_class._constructor__1.mdx -> block_class + // Example: blockly.block_class.mdx -> block_class + const parts = nameWithoutExt.split('.'); + + if (parts.length === 2) { + // This is a top-level page (e.g., blockly.block_class) + const topLevelName = parts[1]; + if (!map.has(topLevelName)) { + map.set(topLevelName, []); + } + } else if (parts.length > 2) { + // This is a sub-page (e.g., blockly.block_class._constructor__1_constructor) + const topLevelName = parts[1]; + if (!map.has(topLevelName)) { + map.set(topLevelName, []); + } + // Add the full name without extension + map.get(topLevelName).push(nameWithoutExt); } - if (filePieces[2] === 'md') { - // Don't add the top-level page to the map. - continue; - } - // Add all sub-pages to the array for the corresponding top-level page. - map.get(name).push(file); } + + // Sort sub-pages: constructors first, then alphabetically + for (const [key, value] of map.entries()) { + value.sort((a, b) => { + const aIsConstructor = a.includes('._constructor'); + const bIsConstructor = b.includes('._constructor'); + if (aIsConstructor && !bIsConstructor) return -1; + if (!aIsConstructor && bIsConstructor) return 1; + return a.localeCompare(b); + }); + } + return map; } /** - * Create the _toc.yaml file used by devsite to create the leftnav. - * This file is generated from the contents of `blockly.md` which contains links - * to the other top-level API pages (each class, namespace, etc.). - * - * The `alternate_paths` for each top-level page contains the path for - * each associated sub-page. All subpages must be linked to their top-level page - * in the TOC for the left nav bar to remain correct after drilling down into a - * sub-page. + * Parse HTML tables from the blockly.md file to extract classes, interfaces, etc. + * @param {string} fileContent The content of blockly.md + * @returns {Object} Object with sections as keys and arrays of {name, path} as values */ -const createToc = function(done) { - const fileContent = fs.readFileSync(`${DOCS_DIR}/blockly.md`, 'utf8'); - // Create the TOC file. The file should not yet exist; if it does, this - // operation will fail. - const toc = fs.openSync(`${DOCS_DIR}/_toc.yaml`, 'ax'); - const files = fs.readdirSync(DOCS_DIR); - const map = buildAlternatePathsMap(files); - const referencePath = '/blockly/reference/js'; - - const tocHeader = `toc: -- title: Overview - path: /blockly/reference/js/blockly.md\n`; - fs.writeSync(toc, tocHeader); - - // Generate a section of TOC for each section/heading in the overview file. +const parseHtmlTables = function(fileContent) { + const result = {}; + + // Split by ## headings const sections = fileContent.split('##'); + for (let section of sections) { - // This converts the md table in each section to a JS object - const table = Extractor.extractObject(section, 'rows', false); - if (!table) { - continue; + const lines = section.split('\n'); + const sectionName = lines[0].trim(); + + if (!sectionName || sectionName === 'blockly package') continue; + + // Find table rows in HTML - match links with or without ./ prefix + const tableRowRegex = /
') && !lines[i].includes('
\s*\[([^\]]+)\]\((?:\/reference\/js\/)?([^\)]+)\)/g; + const items = []; + + let match; + while ((match = tableRowRegex.exec(section)) !== null) { + const name = match[1]; + const href = match[2]; + items.push({ name, path: href }); } - // Get the name of the section, i.e. the text immediately after the `##` in - // the source doc - const sectionName = section.split('\n')[0].trim(); - fs.writeSync(toc, `- heading: ${sectionName}\n`); - for (let row in table) { - // After going through the Extractor, the markdown is now HTML. - // Each row in the table is now a link (anchor tag). - // Get the target of the link, excluding the first `.` since we don't want - // a relative path. - const path = /href="\.(.*?)"/.exec(row)?.[1]; - // Get the name of the link (text in between the and ) - const name = /">(.*?) 0) { + result[sectionName] = items; } } + + return result; +} +/** + * Create the _reference.js file for Docusaurus sidebar. + * This file is generated from the contents of `blockly.mdx` which contains links + * to the other top-level API pages (each class, namespace, etc.). + */ +const createReferenceSidebar = function(done) { + const fileContent = fs.readFileSync(`${DOCS_DIR}/blockly.mdx`, 'utf8'); + const files = fs.readdirSync(DOCS_DIR); + const map = buildAlternatePathsMap(files); + + // Parse HTML tables from the file + const sections = parseHtmlTables(fileContent); + + let sidebarContent = 'export const referenceSidebar = [\n'; + + // Add overview + sidebarContent += ' {\n'; + sidebarContent += ' "type": "doc",\n'; + sidebarContent += ' "label": "Overview",\n'; + sidebarContent += ' "id": "reference/js/blockly"\n'; + sidebarContent += ' },\n'; + + // Process each section (Classes, Interfaces, Functions, etc.) + for (const [sectionName, items] of Object.entries(sections)) { + sidebarContent += ' {\n'; + sidebarContent += ' "type": "category",\n'; + sidebarContent += ` "label": "${sectionName}",\n`; + sidebarContent += ' "collapsible": true,\n'; + sidebarContent += ' "className": "hide-level-3",\n'; + + sidebarContent += ' "items": [\n'; + + // Add items for this section + for (const item of items) { + const itemName = item.name; + const itemPath = item.path.replace('.md', '').replace('.mdx', ''); + const baseName = itemPath.replace('blockly.', ''); + + // Check if this item has sub-pages + const subPages = map.get(baseName); + + if (subPages && subPages.length > 0) { + // Item with sub-pages - create a category + sidebarContent += ' {\n'; + sidebarContent += ' "type": "category",\n'; + sidebarContent += ` "label": "${itemName}",\n`; + sidebarContent += ' "link": {\n'; + sidebarContent += ' "type": "doc",\n'; + sidebarContent += ` "id": "reference/js/${itemPath}"\n`; + sidebarContent += ' },\n'; + sidebarContent += ' "items": [\n'; + + // Add sub-pages + for (const subPage of subPages) { + const subPageId = subPage.replace('blockly.', ''); + sidebarContent += ' {\n'; + sidebarContent += ' "type": "doc",\n'; + sidebarContent += ` "label": "${subPage}",\n`; + sidebarContent += ` "id": "reference/js/${subPage}"\n`; + sidebarContent += ' },\n'; + } + + sidebarContent += ' ],\n'; + + if (sectionName === 'Classes' || sectionName === 'Abstract Classes') { + sidebarContent += ' "className": "hide-from-sidebar"\n'; + } + + sidebarContent += ' },\n'; + } else { + // Simple item without sub-pages + sidebarContent += ' {\n'; + sidebarContent += ' "type": "doc",\n'; + sidebarContent += ` "label": "${itemName}",\n`; + sidebarContent += ` "id": "reference/js/${itemPath}"\n`; + sidebarContent += ' },\n'; + } + } + + sidebarContent += ' ]\n'; + sidebarContent += ' },\n'; + } + + sidebarContent += '];\n'; + + // Write the file to the reference directory + if (!fs.existsSync(REFERENCE_SIDEBAR_DIR)) { + fs.mkdirSync(REFERENCE_SIDEBAR_DIR, { recursive: true }); + } + fs.writeFileSync(`${REFERENCE_SIDEBAR_DIR}/_reference.js`, sidebarContent); + done(); } export const docs = gulp.series( - generateApiJson, removeRenames, generateDocs, - gulp.parallel(prependBook, createToc)); - + generateApiJson, removeRenames, generateDocs, convertToMdx, cleanMdFiles, fixMdxIssues, prependFrontmatter, createReferenceSidebar);