require("amd-loader"); var fs = require("fs"); var util = require("util"); var lib = require("./lib"); var pathlib = require("path"); var parseLanguage = lib.parsePlist; var tk = require("./regexp_tokenizer"); var tokenize = tk.tokenize; var toStr = tk.toStr; function last(array) {return array[array.length - 1]} function convertHexEscape(tokens) { var inChClass = false; tokens.forEach(function(t) { if (t.type == "charclass") inChClass = true; else if (t.type == "charclass.end") inChClass = false; else if (t.type == "charType"){ if (t.value == "\\h") { t.type = "text"; t.value = inChClass ? "\\da-fA-F" : "[\\da-fA-F]"; } else if (t.value == "\\H") { if (inChClass) { console.warn("can't convert \\H in charclass"); return; } t.type = "text"; t.value = "[^\\da-fA-F]"; } } }); return tokens; } function convertNewLinesTo$(str) { var tokens = tokenize(str); for (var i = 0; i < tokens.length; i++) { var t= tokens[i]; if (t.type == "char" && t.value == "\\n") { var p = tokens[i + 1] || {}; if (p.type != "quantifier") { t.value = "$"; while (p.value == "\\n" || p.type == "quantifier") { p.value = ""; p = tokens[++i + 1] || {}; } } else if (/\?|\*|{,|{0,/.test(p.value)) { t.value = p.value = ""; } else p.value = ""; } } return toStr(tokens).replace(/[$]+/g, "$"); } function convertCharacterTypes(str) { var tokens = tokenize(str); tokens = convertHexEscape(tokens); var warn = false; tokens.forEach(function(t){ if (t.type == "quantifier") { var val = t.value; if (val.slice(-1) == "+" && val.length > 1) { t.value = val.slice(0, -1); warn = val; } } }); if (warn) console.log("converted possesive quantifier " + warn + " to *"); return toStr(tokens); } function removeInlineFlags(str, rule) { var tokens = tokenize(str); var caseInsensitive = false; tokens.forEach(function(t, i) { if (t.type == "group.start" && /[imsx]/.test(t.value)) { if (/i/.test(t.value)) caseInsensitive = true; t.value = t.value.replace(/[imsx\-]/g, ""); var next = tokens[i + 1]; if (next && next.type == "group.end") { t.value = next.value = ""; } } }); if (caseInsensitive && rule) rule.caseInsensitive = true; return toStr(tokens); } function convertToNonCapturingGroups(str) { var tokens = tokenize(str); tokens.forEach(function(t, i) { if (t.type == "group.start" && t.value == "(") t.value += "?:"; }); return toStr(tokens); } function simplifyNonCapturingGroups(str) { var tokens = tokenize(str); var t = tokens[0] || {}; if (t.type == "group.start" && t.value == "(?:" && t.end == last(tokens)) { t.value = t.end.value = ""; } var i = 0; function iter(f) { for (i = 0; i < tokens.length; i++) f(tokens[i]); } function iterGroup(end, f) { for (var i1 = i + 1; i1 < tokens.length; i1++) { var t = tokens[i1]; if (t == end) break; var index = f && f(t); if (index > i1) i1 = index; } return i1; } iter(function (t) { if (t.type == "group.start" && t.value == "(?:") { if (!t.end) return console.error("malformed regex: " + str); var canRemove = true; var next = tokens[tokens.indexOf(t.end, i) + 1]; if (next && next.type == "quantifier") return; iterGroup(t.end, function(t) { if (t.type == "alternation") canRemove = false; else if (t.type == "group.start" && t.end) return iterGroup(t.end); }); if (canRemove) t.value = t.end.value = ""; } }); return toStr(tokens); } function removeLookBehinds(str) { var tokens = tokenize(str); var toRemove = null; tokens.forEach(function(t, i) { if (!toRemove && t.type == "group.start" && / i) i = i1; } function lst(t) {return t[t.length - 1]} function iter(f) { for (i = 0; i < tokens.length; i++) f(tokens[i]); } function iterGroup(end, f) { for (var i1 = i + 1; i1 < tokens.length; i1++) { var t = tokens[i1]; if (t == end) break; f(t); } } function peek() { return tokens[i + 1] || {}} // groupify iter(function(t){ if (t.type == "group.start") { tryClose(); isStart = true; if (!t.hasChildren || t.isSpecial) skip(t); } else if (t.type == "group.end") { isStart = true; tryClose(); } else if (t.type == "alternation") { isStart = true; tryClose(); } else if (t.type != "anchor" && t.type != "quantifier"){ tryOpen(); } }); tryClose(); // remove redundand groups var names = [defaultName]; iter(function(t){ if (t.type == "group.start" && !t.isSpecial) { var captureName = captures[t.number]; if (!t.hasChildren) { t.tokenName = captureName || lst(names); skip(t); } else { var hasCapture = false; iterGroup(t.end, function(t1) { if (t1.type == "group.start" && captures[t1.number]) hasCapture = true; }); if (hasCapture) { t.value = "(?:"; if (captureName) { names.push(captureName); t.isTokenGroup = true; } } else { t.tokenName = captureName || lst(names); iterGroup(t.end, function(t1) { if (t1.value == "(") t1.value = "(?:"; }); } } } else if (t.type == "group.end") { if (t.start.isTokenGroup) names.pop(); } }); // wrap capturing groups with quantifier iter(function(t){ if (t.type == "group.end" && t.start.value == "(" && peek().type == "quantifier") { peek().value += ")"; t.start.value += "(?:"; } }); names = []; tokens.forEach(function(t) { if (t.value == "(" || t.value == "((?:" ) t.tokenName && names.push(t.tokenName); }); return { names: names, regex: toStr(tokens) }; } /***** converter */ function logDebug(string, obj) { console.log(string, obj); } // tmLanguage processor // for tracking token states var states = {start: []}; function processRules(rules){ if (rules.patterns) states.start = processPatterns(rules.patterns); if (rules.repository) processRepository(rules.repository); return states; } function processRepository(r) { for (var key in r) { var p = r[key]; if (p.begin) var stateObj = [processPattern(r[key])]; else if (p.patterns && !p.repository) var stateObj = processPatterns(p.patterns); else var stateObj = [processPattern(r[key])]; if (stateObj) states["#" + key] = stateObj; } } function processPatterns(pl) { return pl.map(processPattern); } function processPattern(p) { if (p.end == "(?!\\G)" && p.patterns && p.patterns.length == 1) { var rule = processPattern(p.patterns[0]); } else if (p.begin != null && p.end != null) { convertBeginEndBackrefs(p); var rule = simpleRule(p.begin, p.name, p.beginCaptures || p.captures); var next = processPatterns(p.patterns || []); var endRule = simpleRule(p.end, p.name, p.endCaptures || p.captures); endRule.next = "pop"; if (p.applyEndPatternLast) next.push(endRule); else next.unshift(endRule); if (p.name || p.contentName) next.push({defaultToken: p.name || p.contentName}); rule.push = next; rule = removeIncludeSelf(rule); } else if (p.match) { var rule = simpleRule(p.match, p.name, p.captures); } else if (p.include) { var rule = {include: p.include}; } else { var rule = {todo: p}; } if (p.comment) rule.comment = (rule.comment || "") + p.comment; if (p.repository) processRepository(p.repository); return rule; } function simpleRule(regex, name, captures) { name = name || "text"; var rule = {token: "", regex: ""}; var origRegex = regex; regex = transformRegExp(origRegex, rule); if (captures) { var tokenArray = []; Object.keys(captures).forEach(function(x){ tokenArray[x] = captures[x] && captures[x].name; }); if (tokenArray.length == 1) { name = tokenArray[0]; } else { var fixed = fixGroups(tokenArray, name, regex); name = fixed.names; regex = fixed.regex; if (name.length == 1) name = name[0]; } } if (typeof name == "string") regex = convertToNonCapturingGroups(regex); regex = simplifyNonCapturingGroups(regex); try {new RegExp(regex);} catch(e) { rule.TODO = "FIXME: regexp doesn't have js equivalent"; rule.originalRegex = origRegex; // lookbehinds are mostly used to force ordering // regex = removeLookBehinds(regex); } rule.token = name; rule.regex = regex; return rule; } function removeIncludeSelf(rule) { if (!rule.push) return rule; var hasSelfInclude = false; var escapeRule = null; var complexSelfInclude = false; rule.push.forEach(function(sub) { if (sub.include == "$self") { hasSelfInclude = true; } else if (sub.defaultToken) { return; } else if (sub.next == "pop") { escapeRule = sub; } else complexSelfInclude = true; }); if (hasSelfInclude) { console.warn("can't convert include $self"); return {todo: rule}; if (complexSelfInclude) { console.warn("can't convert include $self"); rule.toDo = "include $self not fully supported"; return rule; } console.warn("include $self not fully supported"); delete rule.push; delete escapeRule.next; rule.includeSelf = true; escapeRule.includeSelf = true; return [rule, escapeRule]; } return rule; } // regex transformation function removeXFlag(str) { var tokens = tokenize(str); return toStr(tokens); } function transformRegExp(str, rule) { str = convertNewLinesTo$(str); str = removeInlineFlags(str, rule); str = str.replace(/(\\[xu]){([a-fA-F\d]+)}/g, '$1$2'); str = convertCharacterTypes(str, rule); checkForNamedCaptures(str); return str; } // function extractPatterns(tmRules) { return processRules(tmRules); } function detectLoops(states) { var data = {}; var keys = Object.keys(states); var flattenedStates = {}; function addRef(item, name) { if (item.refs.indexOf(name) == -1) item.refs.push(name); } function anonStateId(name, next) { var i = 0, old = name; while (flattenedStates[name] || states[name]) { name = old + "_" + i++; } // console.log(old, name) return name; } function addState(key, rules) { if (rules && !flattenedStates[key]) flattenedStates[key] = rules; return rules || flattenedStates[key]; } for (var i = 0; i < keys.length; i++) { var key = keys[i]; var state = addState(key, states[key]); var item = data[key] || (data[key] = {/* name: key, */ refs: []}); state.forEach(function(rule) { var next = rule.push || rule.next; if (next == "pop") { // nothing } else if (typeof next == "string") { addRef(item, next); } else if (next) { var anonId = anonStateId(key, next); addState(anonId, next); if (rule.push) addRef(item, anonId); keys.push(anonId); } else if (rule.include) { addRef(item, rule.include); } }); } var cycles = []; function addPath(start, path) { var node = data[start]; path.push(start); if (!node || !node.refs) console.log(start); var i = path.indexOf(start); if (i > -1 && i != path.length - 1 || start == "$self" || start == "$base") { if (i != -1) path = path.slice(i); for (var j = 0; j < cycles.length; j++) { if (cycles[j] + "" == path + "") return; } return cycles.push(path); } if (!node || !node.refs || !node.refs.length || path.length>30) return; node.refs.forEach(function(x) { addPath(x, path.concat()); }); } addPath("start", []); console.error(cycles.join("\n")); } function test(fileName) { console.log("testing highlighter"); try { var module = require(fileName); var Mode = module[Object.keys(module)[0]]; var mode = new Mode(); mode.getTokenizer().getLineTokens("hello world"); } catch(e) { console.log(e); } } function guessComment(patterns) { var comment = {}; for (var i in patterns) { var state = patterns[i]; state.forEach(function(r) { if (typeof r.token == "string") { if (/\bcomment\b/.test(r.token)) { comment.line = r.regex; } } }); } return comment; } // cli stuff var modeTemplate = fs.readFileSync(__dirname + "/templates/mode.js", "utf8"); var modeHighlightTemplate = fs.readFileSync(__dirname + "/templates/highlight_rules.js", "utf8"); function fetchAndConvert(name) { console.log("Converting " + name); if (/^http/.test(name)) { if (/:\/\/github.com/.test(name)) { name = name.replace(/\/blob\//, "/").replace("github.com", "raw.github.com"); } return lib.download(name, function(data) { convertTmLanguage(name, data); }); } var path = /^(\/|\w:)/.test(name) ? name : process.cwd() + "/" + name; var langStr = fs.readFileSync(path, "utf8"); convertTmLanguage(name, langStr); } function convertTmLanguage(name, langStr) { parseLanguage(langStr, function(language) { var highlighterFilename = lib.snakeCase(language.name).replace(/[^\w]/g, ""); var languageNameSanitized = lib.camelCase(language.name).replace(/[^\w]/g, ""); require("./add_mode")(languageNameSanitized, (language.fileTypes || []).join("|")); var highlighterFile = pathlib.normalize(lib.AceLib + "ace/mode/" + highlighterFilename + "_highlight_rules.js"); var modeFile = pathlib.normalize(lib.AceLib + "ace/mode/" + highlighterFilename + ".js"); if (devMode) { console.log(util.inspect(language.patterns, false, 4)); console.log(util.inspect(language.repository, false, 4)); } var patterns = extractPatterns(language); detectLoops(patterns); // var uuid = language.uuid delete language.uuid; delete language.patterns; delete language.repository; var comment = guessComment(patterns); var languageMode = lib.fillTemplate(modeTemplate, { language: languageNameSanitized, languageHighlightFilename: highlighterFilename, lineCommentStart: JSON.stringify(comment.line || "//"), blockCommentStart: JSON.stringify(comment.start || "/*"), blockCommentEnd: JSON.stringify(comment.end || "*/") }); var languageHighlightRules = lib.fillTemplate(modeHighlightTemplate, { language: languageNameSanitized, languageTokens: lib.formatJS(patterns, " ").trim(), uuid: language.uuid, name: name, metaData: lib.formatJS(language, "").trim() }); if (devMode) { console.log(languageMode); console.log(languageHighlightRules); console.log("Not writing, 'cause we're in dev mode, baby."); } else { fs.writeFileSync(highlighterFile, languageHighlightRules); fs.writeFileSync(modeFile, languageMode); console.log("created file " + highlighterFile); test(modeFile); } }); } if (!module.parent) { var args = process.argv.splice(2); var devMode = args[0] == "--dev"; if (devMode) args.shift(); if (args.length < 1) { console.error("Usage: node tmlanguage.js [--dev] path/or/url/to/syntax.file ..."); process.exit(1); } args.forEach(fetchAndConvert); } else { exports.fetchAndConvert = fetchAndConvert; }