1 /* Copyright (c) 2006, Ivan Sagalaev
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 * Neither the name of highlight.js nor the names of its contributors
12 may be used to endorse or promote products derived from this software
13 without specific prior written permission.
15 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
16 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
19 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 var hljs
= new function() {
29 /* Utility functions */
31 function escape(value
) {
32 return value
.replace(/&/gm
, '&').replace(/</gm
, '<').replace(/>/gm
, '>');
36 return node
.nodeName
.toLowerCase();
39 function testRe(re
, lexeme
) {
40 var match
= re
&& re
.exec(lexeme
);
41 return match
&& match
.index
== 0;
44 function blockLanguage(block
) {
45 var classes
= (block
.className
+ ' ' + (block
.parentNode ? block
.parentNode
.className
: '')).split(/\s
+/);
46 classes
= classes
.map(function(c
) {return c
.replace(/^
lang(uage
)?
-/, '');});
47 return classes
.filter(function(c
) {return getLanguage(c
) || c
== 'no-highlight';})[0];
50 function inherit(parent
, obj
) {
52 for (var key
in parent
)
53 result
[key] = parent
[key];
56 result
[key] = obj
[key];
62 function nodeStream(node
) {
64 (function _nodeStream(node
, offset
) {
65 for (var child
= node
.firstChild
; child
; child
= child
.nextSibling
) {
66 if (child
.nodeType
== 3)
67 offset
+= child
.nodeValue
.length
;
68 else if (tag(child
) == 'br')
70 else if (child
.nodeType
== 1) {
76 offset
= _nodeStream(child
, offset
);
89 function mergeStreams(original
, highlighted
, value
) {
94 function selectStream() {
95 if (!original
.length ||
!highlighted
.length
) {
96 return original
.length ? original
: highlighted
;
98 if (original
[0].offset
!= highlighted
[0].offset
) {
99 return (original
[0].offset
< highlighted
[0].offset
) ? original
: highlighted
;
103 To avoid starting the stream just before it should stop the order is
104 ensured that original always starts first and closes last:
106 if (event1 == 'start' && event2 == 'start')
108 if (event1 == 'start' && event2 == 'stop')
110 if (event1 == 'stop' && event2 == 'start')
112 if (event1 == 'stop' && event2 == 'stop')
115 ... which is collapsed to:
117 return highlighted
[0].event
== 'start' ? original
: highlighted
;
120 function open(node
) {
121 function attr_str(a
) {return ' ' + a
.nodeName
+ '="' + escape(a
.value
) + '"';}
122 result
+= '<' + tag(node
) + Array
.prototype.map
.call(node
.attributes
, attr_str
).join('') + '>';
125 function close(node
) {
126 result
+= '</' + tag(node
) + '>';
129 function render(event
) {
130 (event
.event
== 'start' ? open
: close
)(event
.node
);
133 while (original
.length || highlighted
.length
) {
134 var stream
= selectStream();
135 result
+= escape(value
.substr(processed
, stream
[0].offset
- processed
));
136 processed
= stream
[0].offset
;
137 if (stream
== original
) {
139 On any opening or closing tag of the original markup we first close
140 the entire highlighted node stack, then render the original tag along
141 with all the following original tags at the same offset and then
142 reopen all the tags on the highlighted stack.
144 nodeStack
.reverse().forEach(close
);
146 render(stream
.splice(0, 1)[0]);
147 stream
= selectStream();
148 } while (stream
== original
&& stream
.length
&& stream
[0].offset
== processed
);
149 nodeStack
.reverse().forEach(open
);
151 if (stream
[0].event
== 'start') {
152 nodeStack
.push(stream
[0].node
);
156 render(stream
.splice(0, 1)[0]);
159 return result
+ escape(value
.substr(processed
));
164 function compileLanguage(language
) {
167 return (re
&& re
.source
) || re
;
170 function langRe(value
, global
) {
173 'm' + (language
.case_insensitive ?
'i' : '') + (global ?
'g' : '')
177 function compileMode(mode
, parent
) {
180 mode
.compiled
= true;
182 mode
.keywords
= mode
.keywords || mode
.beginKeywords
;
184 var compiled_keywords
= {};
186 function flatten(className
, str
) {
187 if (language
.case_insensitive
) {
188 str
= str
.toLowerCase();
190 str
.split(' ').forEach(function(kw
) {
191 var pair
= kw
.split('|');
192 compiled_keywords
[pair
[0]] = [className
, pair
[1] ?
Number(pair
[1]) : 1];
196 if (typeof mode
.keywords
== 'string') { // string
197 flatten('keyword', mode
.keywords
);
199 Object
.keys(mode
.keywords
).forEach(function (className
) {
200 flatten(className
, mode
.keywords
[className]);
203 mode
.keywords
= compiled_keywords
;
205 mode
.lexemesRe
= langRe(mode
.lexemes ||
/\b[A
-Za
-z0
-9_
]+\b/, true);
208 if (mode
.beginKeywords
) {
209 mode
.begin
= '\\b(' + mode
.beginKeywords
.split(' ').join('|') + ')\\b';
212 mode
.begin
= /\B|
\b/;
213 mode
.beginRe
= langRe(mode
.begin
);
214 if (!mode
.end
&& !mode
.endsWithParent
)
217 mode
.endRe
= langRe(mode
.end
);
218 mode
.terminator_end
= reStr(mode
.end
) ||
'';
219 if (mode
.endsWithParent
&& parent
.terminator_end
)
220 mode
.terminator_end
+= (mode
.end ?
'|' : '') + parent
.terminator_end
;
223 mode
.illegalRe
= langRe(mode
.illegal
);
224 if (mode
.relevance
=== undefined
)
226 if (!mode
.contains
) {
229 var expanded_contains
= [];
230 mode
.contains
.forEach(function(c
) {
232 c
.variants
.forEach(function(v
) {expanded_contains
.push(inherit(c
, v
));});
234 expanded_contains
.push(c
== 'self' ? mode
: c
);
237 mode
.contains
= expanded_contains
;
238 mode
.contains
.forEach(function(c
) {compileMode(c
, mode
);});
241 compileMode(mode
.starts
, parent
);
245 mode
.contains
.map(function(c
) {
246 return c
.beginKeywords ?
'\\.?(' + c
.begin
+ ')\\.?' : c
.begin
;
248 .concat([mode
.terminator_end
, mode
.illegal
])
251 mode
.terminators
= terminators
.length ?
langRe(terminators
.join('|'), true) : {exec
: function(s
) {return null;}};
253 mode
.continuation
= {};
256 compileMode(language
);
260 Core highlighting function. Accepts a language name, or an alias, and a
261 string with the code to highlight. Returns an object with the following
265 - value (an HTML string with highlighting markup)
268 function highlight(name
, value
, ignore_illegals
, continuation
) {
270 function subMode(lexeme
, mode
) {
271 for (var i
= 0; i
< mode
.contains
.length
; i
++) {
272 if (testRe(mode
.contains
[i].beginRe
, lexeme
)) {
273 return mode
.contains
[i];
278 function endOfMode(mode
, lexeme
) {
279 if (testRe(mode
.endRe
, lexeme
)) {
282 if (mode
.endsWithParent
) {
283 return endOfMode(mode
.parent
, lexeme
);
287 function isIllegal(lexeme
, mode
) {
288 return !ignore_illegals
&& testRe(mode
.illegalRe
, lexeme
);
291 function keywordMatch(mode
, match
) {
292 var match_str
= language
.case_insensitive ? match
[0].toLowerCase() : match
[0];
293 return mode
.keywords
.hasOwnProperty(match_str
) && mode
.keywords
[match_str];
296 function buildSpan(classname
, insideSpan
, leaveOpen
, noPrefix
) {
297 var classPrefix
= noPrefix ?
'' : options
.classPrefix
,
298 openSpan
= '<span class="' + classPrefix
,
299 closeSpan
= leaveOpen ?
'' : '</span>';
301 openSpan
+= classname
+ '">';
303 return openSpan
+ insideSpan
+ closeSpan
;
306 function processKeywords() {
308 return escape(mode_buffer
);
311 top
.lexemesRe
.lastIndex
= 0;
312 var match
= top
.lexemesRe
.exec(mode_buffer
);
314 result
+= escape(mode_buffer
.substr(last_index
, match
.index
- last_index
));
315 var keyword_match
= keywordMatch(top
, match
);
317 relevance
+= keyword_match
[1];
318 result
+= buildSpan(keyword_match
[0], escape(match
[0]));
320 result
+= escape(match
[0]);
322 last_index
= top
.lexemesRe
.lastIndex
;
323 match
= top
.lexemesRe
.exec(mode_buffer
);
325 return result
+ escape(mode_buffer
.substr(last_index
));
328 function processSubLanguage() {
329 if (top
.subLanguage
&& !languages
[top
.subLanguage
]) {
330 return escape(mode_buffer
);
332 var result
= top
.subLanguage ?
highlight(top
.subLanguage
, mode_buffer
, true, top
.continuation
.top
) : highlightAuto(mode_buffer
);
333 // Counting embedded language score towards the host language may be disabled
334 // with zeroing the containing mode relevance. Usecase in point is Markdown that
335 // allows XML everywhere and makes every XML snippet to have a much larger Markdown
337 if (top
.relevance
> 0) {
338 relevance
+= result
.relevance
;
340 if (top
.subLanguageMode
== 'continuous') {
341 top
.continuation
.top
= result
.top
;
343 return buildSpan(result
.language
, result
.value
, false, true);
346 function processBuffer() {
347 return top
.subLanguage
!== undefined ?
processSubLanguage() : processKeywords();
350 function startNewMode(mode
, lexeme
) {
351 var markup
= mode
.className?
buildSpan(mode
.className
, '', true): '';
352 if (mode
.returnBegin
) {
355 } else if (mode
.excludeBegin
) {
356 result
+= escape(lexeme
) + markup
;
360 mode_buffer
= lexeme
;
362 top
= Object
.create(mode
, {parent
: {value
: top
}});
365 function processLexeme(buffer
, lexeme
) {
367 mode_buffer
+= buffer
;
368 if (lexeme
=== undefined
) {
369 result
+= processBuffer();
373 var new_mode
= subMode(lexeme
, top
);
375 result
+= processBuffer();
376 startNewMode(new_mode
, lexeme
);
377 return new_mode
.returnBegin ?
0 : lexeme
.length
;
380 var end_mode
= endOfMode(top
, lexeme
);
383 if (!(origin
.returnEnd || origin
.excludeEnd
)) {
384 mode_buffer
+= lexeme
;
386 result
+= processBuffer();
391 relevance
+= top
.relevance
;
393 } while (top
!= end_mode
.parent
);
394 if (origin
.excludeEnd
) {
395 result
+= escape(lexeme
);
398 if (end_mode
.starts
) {
399 startNewMode(end_mode
.starts
, '');
401 return origin
.returnEnd ?
0 : lexeme
.length
;
404 if (isIllegal(lexeme
, top
))
405 throw new Error('Illegal lexeme "' + lexeme
+ '" for mode "' + (top
.className ||
'<unnamed>') + '"');
408 Parser should not reach this point as all types of lexemes should be caught
409 earlier, but if it does due to some bug make sure it advances at least one
410 character forward to prevent infinite looping.
412 mode_buffer
+= lexeme
;
413 return lexeme
.length ||
1;
416 var language
= getLanguage(name
);
418 throw new Error('Unknown language: "' + name
+ '"');
421 compileLanguage(language
);
422 var top
= continuation || language
;
424 for(var current
= top
; current
!= language
; current
= current
.parent
) {
425 if (current
.className
) {
426 result
+= buildSpan(current
.className
, result
, true);
429 var mode_buffer
= '';
432 var match
, count
, index
= 0;
434 top
.terminators
.lastIndex
= index
;
435 match
= top
.terminators
.exec(value
);
438 count
= processLexeme(value
.substr(index
, match
.index
- index
), match
[0]);
439 index
= match
.index
+ count
;
441 processLexeme(value
.substr(index
));
442 for(var current
= top
; current
.parent
; current
= current
.parent
) { // close dangling modes
443 if (current
.className
) {
448 relevance
: relevance
,
454 if (e
.message
.indexOf('Illegal') != -1) {
466 Highlighting with language detection. Accepts a string with the code to
467 highlight. Returns an object with the following properties:
469 - language (detected language)
471 - value (an HTML string with highlighting markup)
472 - second_best (object with the same structure for second-best heuristically
473 detected language, may be absent)
476 function highlightAuto(text
, languageSubset
) {
477 languageSubset
= languageSubset || options
.languages || Object
.keys(languages
);
482 var second_best
= result
;
483 languageSubset
.forEach(function(name
) {
484 if (!getLanguage(name
)) {
487 var current
= highlight(name
, text
, false);
488 current
.language
= name
;
489 if (current
.relevance
> second_best
.relevance
) {
490 second_best
= current
;
492 if (current
.relevance
> result
.relevance
) {
493 second_best
= result
;
497 if (second_best
.language
) {
498 result
.second_best
= second_best
;
504 Post-processing of the highlighted markup:
506 - replace TABs with something more useful
507 - replace real line-breaks with '<br>' for non-pre containers
510 function fixMarkup(value
) {
511 if (options
.tabReplace
) {
512 value
= value
.replace(/^
((<[^
>]+>|
\t)+)/gm
, function(match
, p1
, offset
, s
) {
513 return p1
.replace(/\t/g
, options
.tabReplace
);
517 value
= value
.replace(/\n/g
, '<br>');
523 Applies highlighting to a DOM node containing code. Accepts a DOM node and
524 two optional parameters for fixMarkup.
526 function highlightBlock(block
) {
527 var text
= options
.useBR ? block
.innerHTML
528 .replace(/\n/g
,'').replace(/<br
>|
<br
[^
>]*>/g
, '\n').replace(/<[^
>]*>/g
,'')
530 var language
= blockLanguage(block
);
531 if (language
== 'no-highlight')
533 var result
= language ?
highlight(language
, text
, true) : highlightAuto(text
);
534 var original
= nodeStream(block
);
535 if (original
.length
) {
536 var pre
= document
.createElementNS('http://www.w3.org/1999/xhtml', 'pre');
537 pre
.innerHTML
= result
.value
;
538 result
.value
= mergeStreams(original
, nodeStream(pre
), text
);
540 result
.value
= fixMarkup(result
.value
);
542 block
.innerHTML
= result
.value
;
543 block
.className
+= ' hljs ' + (!language
&& result
.language ||
'');
545 language
: result
.language
,
548 if (result
.second_best
) {
549 block
.second_best
= {
550 language
: result
.second_best
.language
,
551 re
: result
.second_best
.relevance
557 classPrefix
: 'hljs-',
564 Updates highlight.js global options with values passed in the form of an object
566 function configure(user_options
) {
567 options
= inherit(options
, user_options
);
571 Applies highlighting to all <pre><code>..</code></pre> blocks on a page.
573 function initHighlighting() {
574 if (initHighlighting
.called
)
576 initHighlighting
.called
= true;
578 var blocks
= document
.querySelectorAll('pre code');
579 Array
.prototype.forEach
.call(blocks
, highlightBlock
);
583 Attaches highlighting to the page load event.
585 function initHighlightingOnLoad() {
586 addEventListener('DOMContentLoaded', initHighlighting
, false);
587 addEventListener('load', initHighlighting
, false);
593 function registerLanguage(name
, language
) {
594 var lang
= languages
[name] = language(this);
596 lang
.aliases
.forEach(function(alias
) {aliases
[alias] = name
;});
600 function listLanguages() {
601 return Object
.keys(languages
);
604 function getLanguage(name
) {
605 return languages
[name] || languages
[aliases
[name]];
608 /* Interface definition */
610 this.highlight
= highlight
;
611 this.highlightAuto
= highlightAuto
;
612 this.fixMarkup
= fixMarkup
;
613 this.highlightBlock
= highlightBlock
;
614 this.configure
= configure
;
615 this.initHighlighting
= initHighlighting
;
616 this.initHighlightingOnLoad
= initHighlightingOnLoad
;
617 this.registerLanguage
= registerLanguage
;
618 this.listLanguages
= listLanguages
;
619 this.getLanguage
= getLanguage
;
620 this.inherit
= inherit
;
623 this.IDENT_RE
= '[a-zA-Z][a-zA-Z0-9_]*';
624 this.UNDERSCORE_IDENT_RE
= '[a-zA-Z_][a-zA-Z0-9_]*';
625 this.NUMBER_RE
= '\\b\\d+(\\.\\d+)?';
626 this.C_NUMBER_RE
= '(\\b0[xX][a-fA-F0-9]+|(\\b\\d+(\\.\\d*)?|\\.\\d+)([eE][-+]?\\d+)?)'; // 0x..., 0..., decimal, float
627 this.BINARY_NUMBER_RE
= '\\b(0b[01]+)'; // 0b...
628 this.RE_STARTERS_RE
= '!|!=|!==|%|%=|&|&&|&=|\\*|\\*=|\\+|\\+=|,|-|-=|/=|/|:|;|<<|<<=|<=|<|===|==|=|>>>=|>>=|>=|>>>|>>|>|\\?|\\[|\\{|\\(|\\^|\\^=|\\||\\|=|\\|\\||~';
631 this.BACKSLASH_ESCAPE
= {
632 begin
: '\\\\[\\s\\S]', relevance
: 0
634 this.APOS_STRING_MODE
= {
636 begin
: '\'', end
: '\'',
638 contains
: [this.BACKSLASH_ESCAPE
]
640 this.QUOTE_STRING_MODE
= {
642 begin
: '"', end
: '"',
644 contains
: [this.BACKSLASH_ESCAPE
]
646 this.PHRASAL_WORDS_MODE
= {
647 begin
: /\b(a|an|the|are|I|I
'm|isn't|don
't|doesn't|won
't|but|just|should|pretty|simply|enough|gonna|going|wtf|so|such)\b/
649 this.C_LINE_COMMENT_MODE = {
650 className: 'comment
',
651 begin: '//', end: '$',
652 contains
: [this.PHRASAL_WORDS_MODE
]
654 this.C_BLOCK_COMMENT_MODE
= {
655 className
: 'comment',
656 begin
: '/\\*', end
: '\\*/',
657 contains
: [this.PHRASAL_WORDS_MODE
]
659 this.HASH_COMMENT_MODE
= {
660 className
: 'comment',
661 begin
: '#', end
: '$',
662 contains
: [this.PHRASAL_WORDS_MODE
]
666 begin
: this.NUMBER_RE
,
669 this.C_NUMBER_MODE
= {
671 begin
: this.C_NUMBER_RE
,
674 this.BINARY_NUMBER_MODE
= {
676 begin
: this.BINARY_NUMBER_RE
,
679 this.CSS_NUMBER_MODE
= {
681 begin
: this.NUMBER_RE
+ '(' +
684 '|cm|mm|in|pt|pc|px' +
685 '|deg|grad|rad|turn' +
694 begin
: /\
//, end: /\/[gim]*/,
697 this.BACKSLASH_ESCAPE
,
699 begin
: /\
[/, end
: /\
]/,
701 contains
: [this.BACKSLASH_ESCAPE
]
707 begin
: this.IDENT_RE
,
710 this.UNDERSCORE_TITLE_MODE
= {
712 begin
: this.UNDERSCORE_IDENT_RE
,