Refactoring implementation of wiki parse rules

And some documentation.
print-window-tiddler
Jeremy Ruston 2012-12-14 13:31:47 +00:00
rodzic 28f96de225
commit 31b283ef36
12 zmienionych plików z 223 dodań i 143 usunięć

Wyświetl plik

@ -12,33 +12,21 @@ Wiki text block rule for headings
/*global $tw: false */ /*global $tw: false */
"use strict"; "use strict";
var HeadingRule = function(parser,startPos) { exports.name = "heading";
// Save state
this.parser = parser;
// Regexp to match
this.reMatch = /(!{1,6})/mg;
// Get the first match
this.matchIndex = startPos-1;
this.findNextMatch(startPos);
};
HeadingRule.prototype.findNextMatch = function(startPos) { exports.init = function() {
if(this.matchIndex !== undefined && startPos > this.matchIndex) { // Regexp to match
this.reMatch.lastIndex = startPos; this.matchRegExp = /(!{1,6})/mg;
this.match = this.reMatch.exec(this.parser.source);
this.matchIndex = this.match ? this.match.index : undefined;
}
return this.matchIndex;
}; };
/* /*
Parse the most recent match Parse the most recent match
*/ */
HeadingRule.prototype.parse = function() { exports.parse = function() {
// Get all the details of the match // Get all the details of the match
var headingLevel = this.match[1].length; var headingLevel = this.match[1].length;
// Move past the !s // Move past the !s
this.parser.pos = this.reMatch.lastIndex; this.parser.pos = this.matchRegExp.lastIndex;
// Parse the heading // Parse the heading
var classedRun = this.parser.parseClassedRun(/(\r?\n)/mg); var classedRun = this.parser.parseClassedRun(/(\r?\n)/mg);
// Return the heading // Return the heading
@ -51,7 +39,4 @@ HeadingRule.prototype.parse = function() {
children: classedRun.tree children: classedRun.tree
}]; }];
}; };
exports.HeadingRule = HeadingRule;
})(); })();

Wyświetl plik

@ -46,23 +46,11 @@ A CSS class can be applied to a list item as follows:
/*global $tw: false */ /*global $tw: false */
"use strict"; "use strict";
var ListRule = function(parser,startPos) { exports.name = "list";
// Save state
this.parser = parser;
// Regexp to match
this.reMatch = /([\\*#;:]+)/mg;
// Get the first match
this.matchIndex = startPos-1;
this.findNextMatch(startPos);
};
ListRule.prototype.findNextMatch = function(startPos) { exports.init = function() {
if(this.matchIndex !== undefined && startPos > this.matchIndex) { // Regexp to match
this.reMatch.lastIndex = startPos; this.matchRegExp = /([\\*#;:]+)/mg;
this.match = this.reMatch.exec(this.parser.source);
this.matchIndex = this.match ? this.match.index : undefined;
}
return this.matchIndex;
}; };
var listTypes = { var listTypes = {
@ -75,7 +63,7 @@ var listTypes = {
/* /*
Parse the most recent match Parse the most recent match
*/ */
ListRule.prototype.parse = function() { exports.parse = function() {
// Array of parse tree nodes for the previous row of the list // Array of parse tree nodes for the previous row of the list
var listStack = []; var listStack = [];
// Cycle through the items in the list // Cycle through the items in the list
@ -136,6 +124,4 @@ ListRule.prototype.parse = function() {
return [listStack[0]]; return [listStack[0]];
}; };
exports.ListRule = ListRule;
})(); })();

Wyświetl plik

@ -18,34 +18,22 @@ definition text, including $param$ markers
/*global $tw: false */ /*global $tw: false */
"use strict"; "use strict";
exports.name = "macrodef";
/* /*
Instantiate parse rule Instantiate parse rule
*/ */
var MacroDefRule = function(parser,startPos) { exports.init = function() {
// Save state
this.parser = parser;
// Regexp to match // Regexp to match
this.reMatch = /^\\define\s*([^(\s]+)\(\s*([^)]*)\)(\r?\n)?/mg; this.matchRegExp = /^\\define\s*([^(\s]+)\(\s*([^)]*)\)(\r?\n)?/mg;
// Get the first match
this.matchIndex = startPos-1;
this.findNextMatch(startPos);
};
MacroDefRule.prototype.findNextMatch = function(startPos) {
if(this.matchIndex !== undefined && startPos > this.matchIndex) {
this.reMatch.lastIndex = startPos;
this.match = this.reMatch.exec(this.parser.source);
this.matchIndex = this.match ? this.match.index : undefined;
}
return this.matchIndex;
}; };
/* /*
Parse the most recent match Parse the most recent match
*/ */
MacroDefRule.prototype.parse = function() { exports.parse = function() {
// Move past the macro name and parameters // Move past the macro name and parameters
this.parser.pos = this.reMatch.lastIndex; this.parser.pos = this.matchRegExp.lastIndex;
// Parse the parameters // Parse the parameters
var paramString = this.match[2], var paramString = this.match[2],
params = []; params = [];
@ -93,6 +81,4 @@ MacroDefRule.prototype.parse = function() {
}; };
}; };
exports.MacroDefRule = MacroDefRule;
})(); })();

Wyświetl plik

@ -16,37 +16,23 @@ Wiki text run rule for HTML entities. For example:
/*global $tw: false */ /*global $tw: false */
"use strict"; "use strict";
var EntityRule = function(parser,startPos) { exports.name = "entity";
// Save state
this.parser = parser;
// Regexp to match
this.reMatch = /(&#?[a-zA-Z0-9]{2,8};)/mg;
// Get the first match
this.matchIndex = startPos-1;
this.findNextMatch(startPos);
};
EntityRule.prototype.findNextMatch = function(startPos) { exports.init = function() {
if(this.matchIndex !== undefined && startPos > this.matchIndex) { // Regexp to match
this.reMatch.lastIndex = startPos; this.matchRegExp = /(&#?[a-zA-Z0-9]{2,8};)/mg;
this.match = this.reMatch.exec(this.parser.source);
this.matchIndex = this.match ? this.match.index : undefined;
}
return this.matchIndex;
}; };
/* /*
Parse the most recent match Parse the most recent match
*/ */
EntityRule.prototype.parse = function() { exports.parse = function() {
// Get all the details of the match // Get all the details of the match
var entityString = this.match[1]; var entityString = this.match[1];
// Move past the macro call // Move past the macro call
this.parser.pos = this.reMatch.lastIndex; this.parser.pos = this.matchRegExp.lastIndex;
// Return the entity // Return the entity
return [{type: "entity", entity: this.match[0]}]; return [{type: "entity", entity: this.match[0]}];
}; };
exports.EntityRule = EntityRule;
})(); })();

Wyświetl plik

@ -23,38 +23,26 @@ This is a widget invocation
/*global $tw: false */ /*global $tw: false */
"use strict"; "use strict";
exports.name = "html";
var voidElements = "area,base,br,col,command,embed,hr,img,input,keygen,link,meta,param,source,track,wbr".split(","); var voidElements = "area,base,br,col,command,embed,hr,img,input,keygen,link,meta,param,source,track,wbr".split(",");
var HtmlRule = function(parser,startPos) { exports.init = function() {
// Save state
this.parser = parser;
// Regexp to match // Regexp to match
this.reMatch = /<(_)?([A-Za-z]+)(\s*[^>]*?)(\/)?>/mg; this.matchRegExp = /<(_)?([A-Za-z]+)(\s*[^>]*?)(\/)?>/mg;
// Get the first match
this.matchIndex = startPos-1;
this.findNextMatch(startPos);
};
HtmlRule.prototype.findNextMatch = function(startPos) {
if(this.matchIndex !== undefined && startPos > this.matchIndex) {
this.reMatch.lastIndex = startPos;
this.match = this.reMatch.exec(this.parser.source);
this.matchIndex = this.match ? this.match.index : undefined;
}
return this.matchIndex;
}; };
/* /*
Parse the most recent match Parse the most recent match
*/ */
HtmlRule.prototype.parse = function() { exports.parse = function() {
// Get all the details of the match in case this parser is called recursively // Get all the details of the match in case this parser is called recursively
var isWidget = !!this.match[1], var isWidget = !!this.match[1],
tagName = this.match[2], tagName = this.match[2],
attributeString = this.match[3], attributeString = this.match[3],
isSelfClosing = !!this.match[4]; isSelfClosing = !!this.match[4];
// Move past the tag name and parameters // Move past the tag name and parameters
this.parser.pos = this.reMatch.lastIndex; this.parser.pos = this.matchRegExp.lastIndex;
var reLineBreak = /(\r?\n)/mg, var reLineBreak = /(\r?\n)/mg,
reAttr = /\s*([A-Za-z\-_]+)(?:\s*=\s*(?:("[^"]*")|('[^']*')|(\{\{[^\}]*\}\})|([^"'\s]+)))?/mg, reAttr = /\s*([A-Za-z\-_]+)(?:\s*=\s*(?:("[^"]*")|('[^']*')|(\{\{[^\}]*\}\})|([^"'\s]+)))?/mg,
isBlock; isBlock;
@ -108,6 +96,4 @@ HtmlRule.prototype.parse = function() {
return [element]; return [element];
}; };
exports.HtmlRule = HtmlRule;
})(); })();

Wyświetl plik

@ -16,34 +16,22 @@ Wiki rule for macro calls
/*global $tw: false */ /*global $tw: false */
"use strict"; "use strict";
var MacroCallRule = function(parser,startPos) { exports.name = "macrocall";
// Save state
this.parser = parser;
// Regexp to match
this.reMatch = /<<([^\s>]+)\s*([\s\S]*?)>>/mg;
// Get the first match
this.matchIndex = startPos-1;
this.findNextMatch(startPos);
};
MacroCallRule.prototype.findNextMatch = function(startPos) { exports.init = function() {
if(this.matchIndex !== undefined && startPos > this.matchIndex) { // Regexp to match
this.reMatch.lastIndex = startPos; this.matchRegExp = /<<([^\s>]+)\s*([\s\S]*?)>>/mg;
this.match = this.reMatch.exec(this.parser.source);
this.matchIndex = this.match ? this.match.index : undefined;
}
return this.matchIndex;
}; };
/* /*
Parse the most recent match Parse the most recent match
*/ */
MacroCallRule.prototype.parse = function() { exports.parse = function() {
// Get all the details of the match // Get all the details of the match
var macroName = this.match[1], var macroName = this.match[1],
paramString = this.match[2]; paramString = this.match[2];
// Move past the macro call // Move past the macro call
this.parser.pos = this.reMatch.lastIndex; this.parser.pos = this.matchRegExp.lastIndex;
var params = [], var params = [],
reParam = /\s*(?:([A-Za-z0-9\-_]+)\s*:)?(?:\s*(?:"([^"]*)"|'([^']*)'|\[\[([^\]]*)\]\]|([^"'\s]+)))/mg, reParam = /\s*(?:([A-Za-z0-9\-_]+)\s*:)?(?:\s*(?:"([^"]*)"|'([^']*)'|\[\[([^\]]*)\]\]|([^"'\s]+)))/mg,
paramMatch = reParam.exec(paramString); paramMatch = reParam.exec(paramString);
@ -66,6 +54,4 @@ MacroCallRule.prototype.parse = function() {
}]; }];
}; };
exports.MacroCallRule = MacroCallRule;
})(); })();

Wyświetl plik

@ -0,0 +1,77 @@
/*\
title: $:/core/modules/parsers/wikiparser/rules/run/wikilink.js
type: application/javascript
module-type: wikirunrule
Wiki text run rule for wiki links. For example:
{{{
AWikiLink
AnotherLink
~SuppressedLink
}}}
Precede a camel case word with `~` to prevent it from being recognised as a link.
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
exports.name = "wikilink";
var textPrimitives = {
upperLetter: "[A-Z\u00c0-\u00de\u0150\u0170]",
lowerLetter: "[a-z0-9_\\-\u00df-\u00ff\u0151\u0171]",
anyLetter: "[A-Za-z0-9_\\-\u00c0-\u00de\u00df-\u00ff\u0150\u0170\u0151\u0171]",
anyLetterStrict: "[A-Za-z0-9\u00c0-\u00de\u00df-\u00ff\u0150\u0170\u0151\u0171]"
};
textPrimitives.unWikiLink = "~";
textPrimitives.wikiLink = textPrimitives.upperLetter + "+" +
textPrimitives.lowerLetter + "+" +
textPrimitives.upperLetter +
textPrimitives.anyLetter + "*";
exports.init = function() {
// Regexp to match
this.matchRegExp = new RegExp(textPrimitives.unWikiLink + "?" + textPrimitives.wikiLink,"mg");
};
/*
Parse the most recent match
*/
exports.parse = function() {
// Get the details of the match
var linkText = this.match[0];
// Move past the macro call
this.parser.pos = this.matchRegExp.lastIndex;
// If the link starts with the unwikilink character then just output it as plain text
if(linkText.substr(0,1) === textPrimitives.unWikiLink) {
return [{type: "text", text: linkText.substr(1)}];
}
// If the link has been preceded with a letter then don't treat it as a link
if(this.match.index > 0) {
var preRegExp = new RegExp(textPrimitives.anyLetterStrict,"mg");
preRegExp.lastIndex = this.match.index-1;
var preMatch = preRegExp.exec(this.parser.source);
if(preMatch && preMatch.index === this.match.index-1) {
return [{type: "text", text: linkText}];
}
}
return [{
type: "widget",
tag: "link",
attributes: {
to: {type: "string", value: linkText}
},
children: [{
type: "text",
text: linkText
}]
}];
};
})();

Wyświetl plik

@ -0,0 +1,35 @@
/*\
title: $:/core/modules/parsers/wikiparser/rules/wikirule.js
type: application/javascript
module-type: global
Base class for wiki parser rules
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
var WikiRuleDefaultProperties = {};
/*
To be overridden by individual rules
*/
WikiRuleDefaultProperties.init = function() {
};
/*
Default implementation of findNextMatch looks uses RegExp matching
*/
WikiRuleDefaultProperties.findNextMatch = function(startPos) {
this.matchRegExp.lastIndex = startPos;
this.match = this.matchRegExp.exec(this.parser.source);
return this.match ? this.match.index : undefined;
};
exports.WikiRuleDefaultProperties = WikiRuleDefaultProperties;
})();

Wyświetl plik

@ -38,12 +38,12 @@ var WikiParser = function(vocabulary,type,text,options) {
// Initialise the things that pragma rules can change // Initialise the things that pragma rules can change
this.macroDefinitions = {}; // Hash map of macro definitions this.macroDefinitions = {}; // Hash map of macro definitions
// Instantiate the pragma parse rules // Instantiate the pragma parse rules
this.pragmaRules = this.instantiateRules(this.vocabulary.pragmaRuleClasses,0); this.pragmaRules = this.instantiateRules(this.vocabulary.pragmaRules,0);
// Parse any pragmas // Parse any pragmas
this.parsePragmas(); this.parsePragmas();
// Instantiate the parser block and run rules // Instantiate the parser block and run rules
this.blockRules = this.instantiateRules(this.vocabulary.blockRuleClasses,this.pos); this.blockRules = this.instantiateRules(this.vocabulary.blockRules,this.pos);
this.runRules = this.instantiateRules(this.vocabulary.runRuleClasses,this.pos); this.runRules = this.instantiateRules(this.vocabulary.runRules,this.pos);
// Parse the text into runs or blocks // Parse the text into runs or blocks
if(this.type === "text/vnd.tiddlywiki-run") { if(this.type === "text/vnd.tiddlywiki-run") {
this.tree = this.parseRun(); this.tree = this.parseRun();
@ -56,17 +56,21 @@ var WikiParser = function(vocabulary,type,text,options) {
Instantiate an array of parse rules Instantiate an array of parse rules
*/ */
WikiParser.prototype.instantiateRules = function(classes,startPos) { WikiParser.prototype.instantiateRules = function(classes,startPos) {
var rules = [], var rulesInfo = [],
self = this; self = this;
$tw.utils.each(classes,function(RuleClass) { $tw.utils.each(classes,function(RuleClass) {
// Instantiate the rule // Instantiate the rule
var rule = new RuleClass(self,startPos); var rule = new RuleClass(self);
// Only save the rule if there is at least one match rule.init();
if(rule.matchIndex !== undefined) { var matchIndex = rule.findNextMatch(startPos);
rules.push(rule); if(matchIndex !== undefined) {
rulesInfo.push({
rule: rule,
matchIndex: matchIndex
});
} }
}); });
return rules; return rulesInfo;
}; };
/* /*
@ -87,16 +91,23 @@ WikiParser.prototype.skipWhitespace = function(options) {
Get the next match out of an array of parse rule instances Get the next match out of an array of parse rule instances
*/ */
WikiParser.prototype.findNextMatch = function(rules,startPos) { WikiParser.prototype.findNextMatch = function(rules,startPos) {
var nextMatch = undefined, // Find the best matching rule by finding the closest match position
nextMatchPos = this.sourceLength; var matchingRule = undefined,
matchingRulePos = this.sourceLength;
// Step through each rule
for(var t=0; t<rules.length; t++) { for(var t=0; t<rules.length; t++) {
var matchPos = rules[t].findNextMatch(startPos); var ruleInfo = rules[t];
if(matchPos !== undefined && matchPos <= nextMatchPos) { // Ask the rule to get the next match if we've moved past the current one
nextMatch = rules[t]; if(ruleInfo.matchIndex !== undefined && ruleInfo.matchIndex < startPos) {
nextMatchPos = matchPos; ruleInfo.matchIndex = ruleInfo.rule.findNextMatch(startPos);
}
// Adopt this match if it's closer than the current best match
if(ruleInfo.matchIndex !== undefined && ruleInfo.matchIndex <= matchingRulePos) {
matchingRule = ruleInfo;
matchingRulePos = ruleInfo.matchIndex;
} }
} }
return nextMatch; return matchingRule;
}; };
/* /*
@ -117,7 +128,7 @@ WikiParser.prototype.parsePragmas = function() {
return; return;
} }
// Process the pragma rule // Process the pragma rule
nextMatch.parse(); nextMatch.rule.parse();
} }
}; };
@ -134,7 +145,7 @@ WikiParser.prototype.parseBlock = function(terminatorRegExpString) {
// Look for a block rule that applies at the current position // Look for a block rule that applies at the current position
var nextMatch = this.findNextMatch(this.blockRules,this.pos); var nextMatch = this.findNextMatch(this.blockRules,this.pos);
if(nextMatch && nextMatch.matchIndex === this.pos) { if(nextMatch && nextMatch.matchIndex === this.pos) {
return nextMatch.parse(); return nextMatch.rule.parse();
} }
// Treat it as a paragraph if we didn't find a block rule // Treat it as a paragraph if we didn't find a block rule
return [{type: "element", tag: "p", children: this.parseRun(terminatorRegExp)}]; return [{type: "element", tag: "p", children: this.parseRun(terminatorRegExp)}];
@ -214,7 +225,7 @@ WikiParser.prototype.parseRunUnterminated = function() {
this.pos = nextMatch.matchIndex; this.pos = nextMatch.matchIndex;
} }
// Process the run rule // Process the run rule
tree.push.apply(tree,nextMatch.parse()); tree.push.apply(tree,nextMatch.rule.parse());
// Look for the next run rule // Look for the next run rule
nextMatch = this.findNextMatch(this.runRules,this.pos); nextMatch = this.findNextMatch(this.runRules,this.pos);
} }
@ -253,7 +264,7 @@ WikiParser.prototype.parseRunTerminated = function(terminatorRegExp) {
this.pos = runRuleMatch.matchIndex; this.pos = runRuleMatch.matchIndex;
} }
// Process the run rule // Process the run rule
tree.push.apply(tree,runRuleMatch.parse()); tree.push.apply(tree,runRuleMatch.rule.parse());
// Look for the next run rule // Look for the next run rule
runRuleMatch = this.findNextMatch(this.runRules,this.pos); runRuleMatch = this.findNextMatch(this.runRules,this.pos);
// Look for the next terminator match // Look for the next terminator match

Wyświetl plik

@ -13,15 +13,27 @@ module-type: global
var WikiVocabulary = function(options) { var WikiVocabulary = function(options) {
this.wiki = options.wiki; this.wiki = options.wiki;
// Hashmaps of the various parse rule classes // Hashmaps of the various parse rule classes
this.pragmaRuleClasses = $tw.modules.applyMethods("wikipragmarule"); this.pragmaRules = this.createRuleClasses("wikipragmarule");
this.blockRuleClasses = $tw.modules.applyMethods("wikiblockrule"); this.blockRules = this.createRuleClasses("wikiblockrule");
this.runRuleClasses = $tw.modules.applyMethods("wikirunrule"); this.runRules = this.createRuleClasses("wikirunrule");
// Hashmap of the various renderer classes // Hashmap of the various renderer classes
this.rendererClasses = $tw.modules.applyMethods("wikirenderer"); this.rendererClasses = $tw.modules.applyMethods("wikirenderer");
// Hashmap of the available widgets // Hashmap of the available widgets
this.widgetClasses = $tw.modules.applyMethods("widget"); this.widgetClasses = $tw.modules.applyMethods("widget");
}; };
WikiVocabulary.prototype.createRuleClasses = function(moduleType) {
var ruleClasses = {};
$tw.modules.forEachModuleOfType(moduleType,function(title,moduleExports) {
var ruleClass = function(parser) {
this.parser = parser;
}
$tw.utils.extend(ruleClass.prototype,$tw.WikiRuleDefaultProperties,moduleExports);
ruleClasses[moduleExports.name] = ruleClass;
});
return ruleClasses;
};
WikiVocabulary.prototype.parseText = function(type,text) { WikiVocabulary.prototype.parseText = function(type,text) {
return new $tw.WikiParser(this,type,text,{wiki: this.wiki}); return new $tw.WikiParser(this,type,text,{wiki: this.wiki});
}; };

Wyświetl plik

@ -0,0 +1,2 @@
title: WidgetModules

Wyświetl plik

@ -0,0 +1,28 @@
title: WikiRuleModules
WikiRuleModules cover the module types 'wikirunrule', 'wikiblockrule' and `wikipragmarule`. Modules of these types encapsulate the logic of individual parsing rules used by the WikiParser engine. For example, there is a `wikirunrule` module that identifies references to HTML entities by matching the pattern `&<chars>;`.
Pragma rules are applied at the start of a block of text, and cover definitions and declarations that affect the parsing of the rest of the text. Block rules are only applied at the beginning of a block of wikitext, while run rules can appear anywhere. The only current example of a pragma rule is for macro definitions.
Examples of block rules:
* Headings
* Tables
* Lists
Examples of run rules:
* Entities
* HTML tags
* Wiki links
Parser rule modules extend the `$tw.WikiParserRule` class. This is done by instantiating the class and then copying the exports of the rule module onto the instance. In this way, the parser rule can override the base behaviour of the `$tw.WikiParserRule` class. In particular, the base class incorporates logic for using regular expressions to match parse rules but this logic could be overridden by a parse rule that wanted to, say, use `indexOf()` instead of regular expressions.
The standard methods and properties of parser rules are as follows:
* `parser`: automatically generated property pointing back to the parser containing this rule
* `init()`: initialisation function called immediately after the constructor
* `findNextMatch(pos)`: returns the position of the next match after the specified position
* `parse()`: parses the most recent match, returning an array of the generated parse tree nodes. Pragma rules don't return parse tree nodes but instead modify the parser object directly (for example, to add local macro definitions)
The built in parser rules use regular expression matching. Such rules can take advantage of the implementation of `findNextMatch()` in the base `$tw.WikiParserRule` class by ensuring that their `init()` method creates a `matchRegExp` property containing the regular expression to match. The `match` property contains the details of the match for use in the `parse()` method.