Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 36 additions & 36 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,20 @@ npm install --save node-html-parser

## Performance

-- 2022-08-10
-- 2026-06-20

```shell
html-parser :24.1595 ms/file ± 18.7667
htmljs-parser :4.72064 ms/file ± 5.67689
html-dom-parser :2.18055 ms/file ± 2.96136
html5parser :1.69639 ms/file ± 2.17111
cheerio :12.2122 ms/file ± 8.10916
parse5 :6.50626 ms/file ± 4.02352
htmlparser2 :2.38179 ms/file ± 3.42389
htmlparser :17.4820 ms/file ± 128.041
high5 :3.95188 ms/file ± 2.52313
node-html-parser:2.04288 ms/file ± 1.25203
node-html-parser (last release):2.00527 ms/file ± 1.21317
html-parser :12.5662 ms/file ± 10.0834
htmljs-parser :0.233045 ms/file ± 0.525111
html-dom-parser :1.07375 ms/file ± 0.811077
html5parser :0.824501 ms/file ± 0.540651
cheerio :3.27444 ms/file ± 2.06027
parse5 :2.43857 ms/file ± 1.56153
htmlparser2 :0.712490 ms/file ± 0.364630
htmlparser :10.5275 ms/file ± 82.6013
high5 :1.64003 ms/file ± 0.993116
node-html-parser:0.972389 ms/file ± 0.570578
node-html-parser (last release):0.961381 ms/file ± 0.553054
```

Tested with [htmlparser-benchmark](https://github.com/AndreasMadsen/htmlparser-benchmark).
Expand Down Expand Up @@ -153,36 +153,36 @@ class HTMLElement{
[number, number] range
}
class Node{
<<abstract>>
string toString()
Node clone()
this remove()
number nodeType
string innerText
string textContent
<<abstract>>
string toString()
Node clone()
this remove()
number nodeType
string innerText
string textContent
}
class ClassList{
add(string c)
replace(string c1, string c2)
remove(string c)
toggle(string c)
boolean contains(string c)
number length
string[] value
string toString()
add(string c)
replace(string c1, string c2)
remove(string c)
toggle(string c)
boolean contains(string c)
number length
string[] value
string toString()
}
class CommentNode{
CommentNode clone()
string toString()
CommentNode clone()
string toString()
}
class TextNode{
TextNode clone()
string toString()
string rawText
string trimmedRawText
string trimmedText
string text
boolean isWhitespace
TextNode clone()
string toString()
string rawText
string trimmedRawText
string trimmedText
string text
boolean isWhitespace
}
Node --|> HTMLElement
Node --|> CommentNode
Expand Down
16 changes: 7 additions & 9 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,35 +51,33 @@
},
"dependencies": {
"css-select": "^5.1.0",
"he": "1.2.0"
"entities": "^8.0.0"
},
"devDependencies": {
"@types/entities": "latest",
"@types/he": "latest",
"@types/node": "latest",
"@typescript-eslint/eslint-plugin": "latest",
"@typescript-eslint/eslint-plugin-tslint": "latest",
"@typescript-eslint/parser": "latest",
"blanket": "latest",
"boolbase": "^1.0.0",
"cheerio": "^1.0.0-rc.12",
"cheerio": "^1.2.0",
"cross-env": "^7.0.3",
"eslint": "^8.23.1",
"eslint-config-prettier": "latest",
"eslint-plugin-import": "latest",
"high5": "^1.0.0",
"html-dom-parser": "^3.1.2",
"html-dom-parser": "^8.0.0",
"html-parser": "^0.11.0",
"html5parser": "^2.0.2",
"htmljs-parser": "^5.1.4",
"html5parser": "^3.0.0",
"htmljs-parser": "^5.10.2",
"htmlparser": "^1.7.7",
"htmlparser-benchmark": "^1.1.3",
"htmlparser2": "^8.0.1",
"htmlparser2": "^12.0.0",
"mocha": "latest",
"mocha-each": "^2.0.1",
"neutron-html5parser": "^0.2.0",
"np": "latest",
"parse5": "^7.1.1",
"parse5": "^8.0.1",
"rimraf": "^3.0.2",
"saxes": "^6.0.0",
"should": "latest",
Expand Down
5 changes: 2 additions & 3 deletions src/nodes/html.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { is, selectAll, selectOne } from 'css-select';
import he from 'he';
import entities from 'entities';
import arr_back from '../back';
import Matcher from '../matcher';
import VoidTag from '../void-tag';
Expand Down Expand Up @@ -33,8 +33,7 @@ type IRawTagName =
| 'h6';

function decode(val: string) {
// clone string
return JSON.parse(JSON.stringify(he.decode(val))) as string;
return entities.decodeHTML(val);
}

export interface KeyAttributes {
Expand Down
4 changes: 2 additions & 2 deletions src/nodes/node.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { decode, encode } from 'he';
import { decodeHTML, encode } from 'entities';
import NodeType from './type';
import HTMLElement from './html';

Expand Down Expand Up @@ -43,7 +43,7 @@ export default abstract class Node {
return this.rawText;
}
public get textContent() {
return decode(this.rawText);
return decodeHTML(this.rawText);
}
public set textContent(val: string) {
this.rawText = encode(val);
Expand Down
4 changes: 2 additions & 2 deletions src/nodes/text.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { decode } from 'he';
import { decodeHTML } from 'entities';
import HTMLElement from './html';
import Node from './node';
import NodeType from './type';
Expand Down Expand Up @@ -63,7 +63,7 @@ export default class TextNode extends Node {
* @return {string} text content
*/
public get text() {
return decode(this.rawText);
return decodeHTML(this.rawText);
}

/**
Expand Down
Loading
Loading