aboutsummaryrefslogtreecommitdiff
path: root/src/modules
diff options
context:
space:
mode:
authorMatt Mayer <[email protected]>2022-12-04 03:15:36 +0700
committerGitHub <[email protected]>2022-12-03 21:15:36 +0100
commit4ed45fa33f80c59625a285d06abe31ce2f524357 (patch)
treea01624a3f23004160fcbe63533358ae8819d67ca /src/modules
parent5e51335e19426820874c75678ee5a7ebd332a911 (diff)
downloadfaker-4ed45fa33f80c59625a285d06abe31ce2f524357.tar.xz
faker-4ed45fa33f80c59625a285d06abe31ce2f524357.zip
fix(internet): userName, email and slugify return only ascii (#1554)
Diffstat (limited to 'src/modules')
-rw-r--r--src/modules/helpers/index.ts6
-rw-r--r--src/modules/internet/char-mappings.ts285
-rw-r--r--src/modules/internet/index.ts81
3 files changed, 364 insertions, 8 deletions
diff --git a/src/modules/helpers/index.ts b/src/modules/helpers/index.ts
index 6caa8eff..7f2c5898 100644
--- a/src/modules/helpers/index.ts
+++ b/src/modules/helpers/index.ts
@@ -33,8 +33,10 @@ export class HelpersModule {
*/
slugify(string: string = ''): string {
return string
- .replace(/ /g, '-')
- .replace(/[^\一-龠\ぁ-ゔ\ァ-ヴー\w\.\-]+/g, '');
+ .normalize('NFKD') //for example è decomposes to as e + ̀
+ .replace(/[\u0300-\u036f]/g, '') // removes combining marks
+ .replace(/ /g, '-') // replaces spaces with hyphens
+ .replace(/[^\w\.\-]+/g, ''); // removes all non-word characters except for dots and hyphens
}
/**
diff --git a/src/modules/internet/char-mappings.ts b/src/modules/internet/char-mappings.ts
new file mode 100644
index 00000000..6b784379
--- /dev/null
+++ b/src/modules/internet/char-mappings.ts
@@ -0,0 +1,285 @@
+// Selectively sourced from https://github.com/sindresorhus/transliterate/blob/08bbfd3a13ac393d945a430ed5ec62f044a08d70/replacements.js (under MIT license)
+const cyrillicMapping: { [key: string]: string } = Object.fromEntries([
+ ['А', 'A'],
+ ['а', 'a'],
+ ['Б', 'B'],
+ ['б', 'b'],
+ ['В', 'V'],
+ ['в', 'v'],
+ ['Г', 'G'],
+ ['г', 'g'],
+ ['Д', 'D'],
+ ['д', 'd'],
+ ['ъе', 'ye'],
+ ['Ъе', 'Ye'],
+ ['ъЕ', 'yE'],
+ ['ЪЕ', 'YE'],
+ ['Е', 'E'],
+ ['е', 'e'],
+ ['Ё', 'Yo'],
+ ['ё', 'yo'],
+ ['Ж', 'Zh'],
+ ['ж', 'zh'],
+ ['З', 'Z'],
+ ['з', 'z'],
+ ['И', 'I'],
+ ['и', 'i'],
+ ['ый', 'iy'],
+ ['Ый', 'Iy'],
+ ['ЫЙ', 'IY'],
+ ['ыЙ', 'iY'],
+ ['Й', 'Y'],
+ ['й', 'y'],
+ ['К', 'K'],
+ ['к', 'k'],
+ ['Л', 'L'],
+ ['л', 'l'],
+ ['М', 'M'],
+ ['м', 'm'],
+ ['Н', 'N'],
+ ['н', 'n'],
+ ['О', 'O'],
+ ['о', 'o'],
+ ['П', 'P'],
+ ['п', 'p'],
+ ['Р', 'R'],
+ ['р', 'r'],
+ ['С', 'S'],
+ ['с', 's'],
+ ['Т', 'T'],
+ ['т', 't'],
+ ['У', 'U'],
+ ['у', 'u'],
+ ['Ф', 'F'],
+ ['ф', 'f'],
+ ['Х', 'Kh'],
+ ['х', 'kh'],
+ ['Ц', 'Ts'],
+ ['ц', 'ts'],
+ ['Ч', 'Ch'],
+ ['ч', 'ch'],
+ ['Ш', 'Sh'],
+ ['ш', 'sh'],
+ ['Щ', 'Sch'],
+ ['щ', 'sch'],
+ ['Ъ', ''],
+ ['ъ', ''],
+ ['Ы', 'Y'],
+ ['ы', 'y'],
+ ['Ь', ''],
+ ['ь', ''],
+ ['Э', 'E'],
+ ['э', 'e'],
+ ['Ю', 'Yu'],
+ ['ю', 'yu'],
+ ['Я', 'Ya'],
+ ['я', 'ya'],
+]);
+const greekMapping: { [key: string]: string } = Object.fromEntries([
+ ['α', 'a'],
+ ['β', 'v'],
+ ['γ', 'g'],
+ ['δ', 'd'],
+ ['ε', 'e'],
+ ['ζ', 'z'],
+ ['η', 'i'],
+ ['θ', 'th'],
+ ['ι', 'i'],
+ ['κ', 'k'],
+ ['λ', 'l'],
+ ['μ', 'm'],
+ ['ν', 'n'],
+ ['ξ', 'ks'],
+ ['ο', 'o'],
+ ['π', 'p'],
+ ['ρ', 'r'],
+ ['σ', 's'],
+ ['τ', 't'],
+ ['υ', 'y'],
+ ['φ', 'f'],
+ ['χ', 'x'],
+ ['ψ', 'ps'],
+ ['ω', 'o'],
+ ['ά', 'a'],
+ ['έ', 'e'],
+ ['ί', 'i'],
+ ['ό', 'o'],
+ ['ύ', 'y'],
+ ['ή', 'i'],
+ ['ώ', 'o'],
+ ['ς', 's'],
+ ['ϊ', 'i'],
+ ['ΰ', 'y'],
+ ['ϋ', 'y'],
+ ['ΐ', 'i'],
+ ['Α', 'A'],
+ ['Β', 'B'],
+ ['Γ', 'G'],
+ ['Δ', 'D'],
+ ['Ε', 'E'],
+ ['Ζ', 'Z'],
+ ['Η', 'I'],
+ ['Θ', 'TH'],
+ ['Ι', 'I'],
+ ['Κ', 'K'],
+ ['Λ', 'L'],
+ ['Μ', 'M'],
+ ['Ν', 'N'],
+ ['Ξ', 'KS'],
+ ['Ο', 'O'],
+ ['Π', 'P'],
+ ['Ρ', 'R'],
+ ['Σ', 'S'],
+ ['Τ', 'T'],
+ ['Υ', 'Y'],
+ ['Φ', 'F'],
+ ['Χ', 'X'],
+ ['Ψ', 'PS'],
+ ['Ω', 'O'],
+ ['Ά', 'A'],
+ ['Έ', 'E'],
+ ['Ί', 'I'],
+ ['Ό', 'O'],
+ ['Ύ', 'Y'],
+ ['Ή', 'I'],
+ ['Ώ', 'O'],
+ ['Ϊ', 'I'],
+ ['Ϋ', 'Y'],
+]);
+const arabicMapping: { [key: string]: string } = Object.fromEntries([
+ ['ء', 'e'],
+ ['آ', 'a'],
+ ['أ', 'a'],
+ ['ؤ', 'w'],
+ ['إ', 'i'],
+ ['ئ', 'y'],
+ ['ا', 'a'],
+ ['ب', 'b'],
+ ['ة', 't'],
+ ['ت', 't'],
+ ['ث', 'th'],
+ ['ج', 'j'],
+ ['ح', 'h'],
+ ['خ', 'kh'],
+ ['د', 'd'],
+ ['ذ', 'dh'],
+ ['ر', 'r'],
+ ['ز', 'z'],
+ ['س', 's'],
+ ['ش', 'sh'],
+ ['ص', 's'],
+ ['ض', 'd'],
+ ['ط', 't'],
+ ['ظ', 'z'],
+ ['ع', 'e'],
+ ['غ', 'gh'],
+ ['ـ', '_'],
+ ['ف', 'f'],
+ ['ق', 'q'],
+ ['ك', 'k'],
+ ['ل', 'l'],
+ ['م', 'm'],
+ ['ن', 'n'],
+ ['ه', 'h'],
+ ['و', 'w'],
+ ['ى', 'a'],
+ ['ي', 'y'],
+ ['َ‎', 'a'],
+ ['ُ', 'u'],
+ ['ِ‎', 'i'],
+]);
+const armenianMapping: { [key: string]: string } = Object.fromEntries([
+ ['ա', 'a'],
+ ['Ա', 'A'],
+ ['բ', 'b'],
+ ['Բ', 'B'],
+ ['գ', 'g'],
+ ['Գ', 'G'],
+ ['դ', 'd'],
+ ['Դ', 'D'],
+ ['ե', 'ye'],
+ ['Ե', 'Ye'],
+ ['զ', 'z'],
+ ['Զ', 'Z'],
+ ['է', 'e'],
+ ['Է', 'E'],
+ ['ը', 'y'],
+ ['Ը', 'Y'],
+ ['թ', 't'],
+ ['Թ', 'T'],
+ ['ժ', 'zh'],
+ ['Ժ', 'Zh'],
+ ['ի', 'i'],
+ ['Ի', 'I'],
+ ['լ', 'l'],
+ ['Լ', 'L'],
+ ['խ', 'kh'],
+ ['Խ', 'Kh'],
+ ['ծ', 'ts'],
+ ['Ծ', 'Ts'],
+ ['կ', 'k'],
+ ['Կ', 'K'],
+ ['հ', 'h'],
+ ['Հ', 'H'],
+ ['ձ', 'dz'],
+ ['Ձ', 'Dz'],
+ ['ղ', 'gh'],
+ ['Ղ', 'Gh'],
+ ['ճ', 'tch'],
+ ['Ճ', 'Tch'],
+ ['մ', 'm'],
+ ['Մ', 'M'],
+ ['յ', 'y'],
+ ['Յ', 'Y'],
+ ['ն', 'n'],
+ ['Ն', 'N'],
+ ['շ', 'sh'],
+ ['Շ', 'Sh'],
+ ['ո', 'vo'],
+ ['Ո', 'Vo'],
+ ['չ', 'ch'],
+ ['Չ', 'Ch'],
+ ['պ', 'p'],
+ ['Պ', 'P'],
+ ['ջ', 'j'],
+ ['Ջ', 'J'],
+ ['ռ', 'r'],
+ ['Ռ', 'R'],
+ ['ս', 's'],
+ ['Ս', 'S'],
+ ['վ', 'v'],
+ ['Վ', 'V'],
+ ['տ', 't'],
+ ['Տ', 'T'],
+ ['ր', 'r'],
+ ['Ր', 'R'],
+ ['ց', 'c'],
+ ['Ց', 'C'],
+ ['ու', 'u'],
+ ['ՈՒ', 'U'],
+ ['Ու', 'U'],
+ ['փ', 'p'],
+ ['Փ', 'P'],
+ ['ք', 'q'],
+ ['Ք', 'Q'],
+ ['օ', 'o'],
+ ['Օ', 'O'],
+ ['ֆ', 'f'],
+ ['Ֆ', 'F'],
+ ['և', 'yev'],
+]);
+const farsiMapping: { [key: string]: string } = Object.fromEntries([
+ ['چ', 'ch'],
+ ['ک', 'k'],
+ ['گ', 'g'],
+ ['پ', 'p'],
+ ['ژ', 'zh'],
+ ['ی', 'y'],
+]);
+export const charMapping: { [key: string]: string } = {
+ ...cyrillicMapping,
+ ...greekMapping,
+ ...arabicMapping,
+ ...farsiMapping,
+ ...armenianMapping,
+};
diff --git a/src/modules/internet/index.ts b/src/modules/internet/index.ts
index a32045d9..dc3eaa62 100644
--- a/src/modules/internet/index.ts
+++ b/src/modules/internet/index.ts
@@ -1,4 +1,5 @@
import type { Faker } from '../..';
+import { charMapping } from './char-mappings';
import * as random_ua from './user-agent';
export type EmojiType =
@@ -81,10 +82,7 @@ export class InternetModule {
this.faker.definitions.internet.free_email
);
- let localPart: string = this.faker.helpers.slugify(
- this.userName(firstName, lastName)
- );
-
+ let localPart: string = this.userName(firstName, lastName);
if (options?.allowSpecialCharacters) {
const usernameChars: string[] = '._-'.split('');
const specialChars: string[] = ".!#$%&'*+-/=?^_`{|}~".split('');
@@ -125,14 +123,20 @@ export class InternetModule {
}
/**
- * Generates a username using the given person's name as base.
+ * Generates a username using the given person's name as base. The resuling username may use neither, one or both of the names provided. This will always return a plain ASCII string. Some basic stripping of accents and transliteration of characters will be done.
*
* @param firstName The optional first name to use. If not specified, a random one will be chosen.
* @param lastName The optional last name to use. If not specified, a random one will be chosen.
*
+ * @see faker.internet.displayName()
+ *
* @example
* faker.internet.userName() // 'Nettie_Zboncak40'
- * faker.internet.userName('Jeanne', 'Doe') // 'Jeanne98'
+ * faker.internet.userName('Jeanne', 'Doe') // 'Jeanne98' - note surname is not used
+ * faker.internet.userName('John', 'Doe') // 'John.Doe'
+ * faker.internet.userName('Hélene', 'Müller') // 'Helene_Muller11'
+ * faker.internet.userName('Фёдор', 'Достоевский') // 'Fedor.Dostoevskii50'
+ * faker.internet.userName('大羽', '陳') // 'hlzp8d.tpv45' - note neither name is used
*
* @since 2.0.1
*/
@@ -155,6 +159,71 @@ export class InternetModule {
])}${lastName}${this.faker.number.int(99)}`;
break;
}
+
+ // There may still be non-ascii characters in the result.
+ // First remove simple accents etc
+ result = result
+ .normalize('NFKD') //for example è decomposes to as e + ̀
+ .replace(/[\u0300-\u036f]/g, ''); // removes combining marks
+
+ result = result
+ .split('')
+ .map((char) => {
+ // If we have a mapping for this character, (for Cyrillic, Greek etc) use it
+ if (charMapping[char]) {
+ return charMapping[char];
+ }
+ if (char.charCodeAt(0) < 0x80) {
+ // Keep ASCII characters
+ return char;
+ }
+ // Final fallback return the Unicode char code value for Chinese, Japanese, Korean etc, base-36 encoded
+ return char.charCodeAt(0).toString(36);
+ })
+ .join('');
+ result = result.toString().replace(/'/g, '');
+ result = result.replace(/ /g, '');
+
+ return result;
+ }
+
+ /**
+ * Generates a display name using the given person's name as base. The resulting display name may use one or both of the provided names. If the input names include Unicode characters, the resulting display name will contain Unicode characters. It will not contain spaces.
+ *
+ * @param firstName The optional first name to use. If not specified, a random one will be chosen.
+ * @param lastName The optional last name to use. If not specified, a random one will be chosen.
+ *
+ * @see faker.internet.userName()
+ *
+ * @example
+ * faker.internet.displayName() // 'Nettie_Zboncak40'
+ * faker.internet.displayName('Jeanne', 'Doe') // 'Jeanne98' - note surname not used.
+ * faker.internet.displayName('John', 'Doe') // 'John.Doe'
+ * faker.internet.displayName('Hélene', 'Müller') // 'Hélene_Müller11'
+ * faker.internet.displayName('Фёдор', 'Достоевский') // 'Фёдор.Достоевский50'
+ * faker.internet.displayName('大羽', '陳') // '大羽.陳'
+ *
+ * @since 8.0.0
+ */
+ displayName(firstName?: string, lastName?: string): string {
+ let result: string;
+ firstName = firstName || this.faker.person.firstName();
+ lastName = lastName || this.faker.person.lastName();
+ switch (this.faker.number.int(2)) {
+ case 0:
+ result = `${firstName}${this.faker.number.int(99)}`;
+ break;
+ case 1:
+ result =
+ firstName + this.faker.helpers.arrayElement(['.', '_']) + lastName;
+ break;
+ case 2:
+ result = `${firstName}${this.faker.helpers.arrayElement([
+ '.',
+ '_',
+ ])}${lastName}${this.faker.number.int(99)}`;
+ break;
+ }
result = result.toString().replace(/'/g, '');
result = result.replace(/ /g, '');
return result;