Skip to content

Commit 1d75072

Browse files
committed
Optimize library size
1 parent aef7d96 commit 1d75072

File tree

13 files changed

+41285
-124002
lines changed

13 files changed

+41285
-124002
lines changed

‎docs/benchmark.md

+11-11
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,17 @@
1313

1414
Here is the list of libraries in this benchmark
1515

16-
| Library | Script | Properly Identified | Improperly identified | Not identified | Avg Execution Time | Disk Size |
17-
| -------------- | --------------------------- | ------------------- | --------------------- | -------------- | ------------------ | --------- |
18-
| **TinyLD** | `yarn bench:tinyld` | 98.274% | 1.6277% | 0.0983% | 0.1002ms. | 930KB |
19-
| **TinyLD Web** | `yarn bench:tinyld-light` | 97.8901% | 1.9765% | 0.1334% | 0.0868ms. | **110KB** |
20-
| **langdetect** | `yarn bench:langdetect` | 95.6623% | 4.3377% | 0% | 0.584ms. | 1.8MB |
21-
| node-cld | `yarn bench:cld` | 92.2917% | 1.6375% | 6.0708% | 0.0687ms. | > 10MB |
22-
| node-lingua | `yarn bench:lingua` | 87.1399% | 0.069% | 12.7912% | 1.1357ms. | ~100MB |
23-
| franc | `yarn bench:franc` | 74.0083% | 25.9917% | 0% | 0.1947ms. | 267KB |
24-
| franc-min | `yarn bench:franc-min` | 70.1002% | 23.4164% | 6.4834% | 0.0914ms. | **119KB** |
25-
| franc-all | `yarn bench:franc-all` | 66.3846% | 33.6154% | 0% | 0.576ms. | 509KB |
26-
| languagedetect | `yarn bench:languagedetect` | 65.9069% | 11.3828% | 22.7103% | 0.2643ms. | **240KB** |
16+
| Library | Script | Language | Properly Identified | Improperly identified | Not identified | Avg Execution Time | Disk Size |
17+
| -------------- | --------------------------- | -------- | ------------------- | --------------------- | -------------- | ------------------ | --------- |
18+
| **TinyLD** | `yarn bench:tinyld` | 64 | 98.274% | 1.6277% | 0.0983% | 0.0871ms. | 580KB |
19+
| **TinyLD Web** | `yarn bench:tinyld-light` | 24 | 97.8901% | 1.9765% | 0.1334% | 0.0637ms. | **68KB** |
20+
| **langdetect** | `yarn bench:langdetect` | 53 | 95.6623% | 4.3377% | 0% | 0.584ms. | 1.8MB |
21+
| node-cld | `yarn bench:cld` | 160 | 92.2917% | 1.6375% | 6.0708% | 0.0687ms. | > 10MB |
22+
| node-lingua | `yarn bench:lingua` | 75 | 87.1399% | 0.069% | 12.7912% | 1.1357ms. | ~100MB |
23+
| franc | `yarn bench:franc` | 187 | 74.0083% | 25.9917% | 0% | 0.1947ms. | 267KB |
24+
| franc-min | `yarn bench:franc-min` | 82 | 70.1002% | 23.4164% | 6.4834% | 0.0914ms. | 119KB |
25+
| franc-all | `yarn bench:franc-all` | 403 | 66.3846% | 33.6154% | 0% | 0.576ms. | 509KB |
26+
| languagedetect | `yarn bench:languagedetect` | 52 | 65.9069% | 11.3828% | 22.7103% | 0.2643ms. | 240KB |
2727

2828
---
2929

‎docs/exec_time.svg

+1-1
Loading

‎package.json

+4-4
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,10 @@
5252
"bench:languagedetect": "ts-node src/benchmark/languagedetect.ts",
5353
"bench:lingua": "ts-node src/benchmark/lingua.ts",
5454
"build": "run-p build:*",
55-
"build:normal-node": "esbuild src/index.ts --bundle --minify --platform=node --outfile=dist/tinyld.cjs.js",
56-
"build:normal-web": "esbuild src/index.ts --bundle --minify --platform=browser --format=esm --outfile=dist/tinyld.esm.js",
57-
"build:light-node": "esbuild src/index_light.ts --bundle --minify --platform=node --outfile=dist/tinyld.light.cjs.js",
58-
"build:light-web": "esbuild src/index_light.ts --bundle --minify --platform=browser --format=esm --outfile=dist/tinyld.light.esm.js",
55+
"build:normal-node": "esbuild src/index.ts --define:process.env={\\\"TINYLD_CONFIG\\\":\\\"normal\\\"} --bundle --charset=utf8 --minify --platform=node --outfile=dist/tinyld.cjs.js",
56+
"build:normal-web": "esbuild src/index.ts --define:process.env={\\\"TINYLD_CONFIG\\\":\\\"normal\\\"} --bundle --charset=utf8 --minify --platform=browser --format=esm --outfile=dist/tinyld.esm.js",
57+
"build:light-node": "esbuild src/index_light.ts --define:process.env={\\\"TINYLD_CONFIG\\\":\\\"light\\\"} --bundle --charset=utf8 --minify --platform=node --outfile=dist/tinyld.light.cjs.js",
58+
"build:light-web": "esbuild src/index_light.ts --define:process.env={\\\"TINYLD_CONFIG\\\":\\\"light\\\"} --bundle --charset=utf8 --minify --platform=browser --format=esm --outfile=dist/tinyld.light.esm.js",
5959
"build:post": "yarn gen:svg && yarn gen:mkd && yarn lint",
6060
"gen:mkd": "node ./utils/mkdown.js",
6161
"gen:svg": "node ./utils/index.js",

‎src/benchmark/bench.ts

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import fs from 'fs'
22
import readline from 'readline'
3-
import { langName } from '..'
4-
import { approximate, getCoef, langs, toISO2 } from '../core'
3+
import { approximate, getCoef, langs, langName, toISO2 } from '../core'
54

65
type DetectMethod = (val: string) => Promise<string> | string
76

‎src/core.ts

+7
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@ export interface ILangProfiles {
33
multiples: { [gram: string]: { [country: string]: number } }
44
}
55

6+
export interface ILangCompressedProfiles {
7+
uniques: { [id: string]: string | number }
8+
multiples: { [gram: string]: string }
9+
}
10+
611
// different config profiles
712
const config = {
813
light: {
@@ -154,6 +159,8 @@ export const langs = new Set(
154159
.map((x) => x[0])
155160
)
156161
export const supportedLanguages = [...langs.values()]
162+
export const langToId = Object.fromEntries(supportedLanguages.map((x, i) => [x, i + 1]))
163+
export const langFromId = Object.fromEntries(supportedLanguages.map((x, i) => [i + 1, x]))
157164

158165
export function langRegion(iso3: string): string {
159166
if (iso3 in langMap) return langMap[iso3].region

‎src/index.ts

+20-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,27 @@
11
import { isString } from './clean'
2-
import { DetectOption, ILangProfiles, parseDetectOption } from './core'
2+
import { DetectOption, ILangCompressedProfiles, ILangProfiles, langFromId, parseDetectOption } from './core'
33
import data from './profiles/normal.json'
44
import { detectAllStats } from './tokenizer'
55

6-
const profiles = data as ILangProfiles
6+
const compressed = data as ILangCompressedProfiles
7+
const profiles: ILangProfiles = {
8+
uniques: Object.fromEntries(
9+
Object.entries(compressed.uniques).map((x) => {
10+
return [x[0], langFromId[parseInt(x[1].toString(), 36)]]
11+
})
12+
),
13+
multiples: Object.fromEntries(
14+
Object.entries(compressed.multiples).map((x) => {
15+
const entry = Object.fromEntries(
16+
x[1].match(/(.{1,4})/g)?.map((y) => {
17+
const [country, val] = y.match(/(.{1,2})/g) as string[]
18+
return [langFromId[parseInt(country, 36)], parseInt(val, 36)]
19+
}) || []
20+
)
21+
return [x[0], entry]
22+
})
23+
)
24+
}
725
const uniqueKeys = new Set(Object.keys(data.uniques))
826

927
export function detect(text: string, opts?: Partial<DetectOption>): string {

‎src/index_light.ts

+21-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,28 @@
11
import { isString } from './clean'
2-
import { DetectOption, ILangProfiles, parseDetectOption } from './core'
2+
import { DetectOption, ILangProfiles, parseDetectOption, ILangCompressedProfiles, langFromId } from './core'
33
import data from './profiles/light.json'
44
import { detectAllStats } from './tokenizer'
55

6-
const profiles = data as ILangProfiles
6+
const compressed = data as ILangCompressedProfiles
7+
const profiles: ILangProfiles = {
8+
uniques: Object.fromEntries(
9+
Object.entries(compressed.uniques).map((x) => {
10+
return [x[0], langFromId[parseInt(x[1].toString(), 36)]]
11+
})
12+
),
13+
multiples: Object.fromEntries(
14+
Object.entries(compressed.multiples).map((x) => {
15+
const entry = Object.fromEntries(
16+
x[1].match(/(.{1,3})/g)?.map((y) => {
17+
const country = y.slice(0, 1)
18+
const val = y.slice(1)
19+
return [langFromId[parseInt(country, 36)], parseInt(val, 36)]
20+
}) || []
21+
)
22+
return [x[0], entry]
23+
})
24+
)
25+
}
726
const uniqueKeys = new Set(Object.keys(data.uniques))
827

928
export function detect(text: string, opts?: Partial<DetectOption>): string {

0 commit comments

Comments
 (0)