ICU 52.1  52.1
uspoof.h
Go to the documentation of this file.
1 /*
2 ***************************************************************************
3 * Copyright (C) 2008-2013, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 ***************************************************************************
6 * file name: uspoof.h
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * created on: 2008Feb13
12 * created by: Andy Heninger
13 *
14 * Unicode Spoof Detection
15 */
16 
17 #ifndef USPOOF_H
18 #define USPOOF_H
19 
20 #include "unicode/utypes.h"
21 #include "unicode/uset.h"
22 #include "unicode/parseerr.h"
23 #include "unicode/localpointer.h"
24 
25 #if !UCONFIG_NO_NORMALIZATION
26 
27 
28 #if U_SHOW_CPLUSPLUS_API
29 #include "unicode/unistr.h"
30 #include "unicode/uniset.h"
31 #endif
32 
33 
144 struct USpoofChecker;
145 typedef struct USpoofChecker USpoofChecker;
154 typedef enum USpoofChecks {
161 
171 
182 
190 
205 
206 #ifndef U_HIDE_DEPRECATED_API
207 
213 #endif /* U_HIDE_DEPRECATED_API */
214 
222 
228 
229 #ifndef U_HIDE_DRAFT_API
230 
237 #endif /* U_HIDE_DRAFT_API */
238 
245 
246 #ifndef U_HIDE_DRAFT_API
247 
259  USPOOF_AUX_INFO = 0x40000000
260 #endif /* U_HIDE_DRAFT_API */
261 
262  } USpoofChecks;
263 
264 
265 #ifndef U_HIDE_DRAFT_API
266 
271  typedef enum URestrictionLevel {
277  USPOOF_ASCII = 0x10000000,
303  USPOOF_UNRESTRICTIVE = 0x50000000
305 #endif /* U_HIDE_DRAFT_API */
306 
317 U_STABLE USpoofChecker * U_EXPORT2
318 uspoof_open(UErrorCode *status);
319 
320 
342 U_STABLE USpoofChecker * U_EXPORT2
343 uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength,
344  UErrorCode *pErrorCode);
345 
377 U_STABLE USpoofChecker * U_EXPORT2
378 uspoof_openFromSource(const char *confusables, int32_t confusablesLen,
379  const char *confusablesWholeScript, int32_t confusablesWholeScriptLen,
380  int32_t *errType, UParseError *pe, UErrorCode *status);
381 
382 
388 U_STABLE void U_EXPORT2
390 
391 #if U_SHOW_CPLUSPLUS_API
392 
394 
405 
407 
408 #endif
409 
419 U_STABLE USpoofChecker * U_EXPORT2
420 uspoof_clone(const USpoofChecker *sc, UErrorCode *status);
421 
422 
435 U_STABLE void U_EXPORT2
436 uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status);
437 
449 U_STABLE int32_t U_EXPORT2
450 uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status);
451 
452 #ifndef U_HIDE_DRAFT_API
453 
461 U_DRAFT void U_EXPORT2
463 
464 
472 U_DRAFT URestrictionLevel U_EXPORT2
474 #endif /* U_HIDE_DRAFT_API */
475 
518 U_STABLE void U_EXPORT2
519 uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status);
520 
542 U_STABLE const char * U_EXPORT2
544 
545 
564 U_STABLE void U_EXPORT2
565 uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status);
566 
567 
588 U_STABLE const USet * U_EXPORT2
590 
591 
592 #if U_SHOW_CPLUSPLUS_API
593 
611 U_STABLE void U_EXPORT2
613 
614 
635 U_STABLE const icu::UnicodeSet * U_EXPORT2
637 #endif
638 
639 
666 U_STABLE int32_t U_EXPORT2
667 uspoof_check(const USpoofChecker *sc,
668  const UChar *id, int32_t length,
669  int32_t *position,
670  UErrorCode *status);
671 
672 
700 U_STABLE int32_t U_EXPORT2
702  const char *id, int32_t length,
703  int32_t *position,
704  UErrorCode *status);
705 
706 
707 #if U_SHOW_CPLUSPLUS_API
708 
731 U_STABLE int32_t U_EXPORT2
733  const icu::UnicodeString &id,
734  int32_t *position,
735  UErrorCode *status);
736 
737 #endif
738 
739 
779 U_STABLE int32_t U_EXPORT2
781  const UChar *id1, int32_t length1,
782  const UChar *id2, int32_t length2,
783  UErrorCode *status);
784 
785 
786 
812 U_STABLE int32_t U_EXPORT2
814  const char *id1, int32_t length1,
815  const char *id2, int32_t length2,
816  UErrorCode *status);
817 
818 
819 
820 
821 #if U_SHOW_CPLUSPLUS_API
822 
843 U_STABLE int32_t U_EXPORT2
845  const icu::UnicodeString &s1,
846  const icu::UnicodeString &s2,
847  UErrorCode *status);
848 #endif
849 
850 
883 U_STABLE int32_t U_EXPORT2
885  uint32_t type,
886  const UChar *id, int32_t length,
887  UChar *dest, int32_t destCapacity,
888  UErrorCode *status);
889 
925 U_STABLE int32_t U_EXPORT2
927  uint32_t type,
928  const char *id, int32_t length,
929  char *dest, int32_t destCapacity,
930  UErrorCode *status);
931 
932 #if U_SHOW_CPLUSPLUS_API
933 
958 U_I18N_API icu::UnicodeString & U_EXPORT2
960  uint32_t type,
961  const icu::UnicodeString &id,
962  icu::UnicodeString &dest,
963  UErrorCode *status);
964 #endif /* U_SHOW_CPLUSPLUS_API */
965 
966 
967 #ifndef U_HIDE_DRAFT_API
968 
979 U_DRAFT const USet * U_EXPORT2
981 
993 U_DRAFT const USet * U_EXPORT2
995 
996 #if U_SHOW_CPLUSPLUS_API
997 
1009 U_DRAFT const icu::UnicodeSet * U_EXPORT2
1011 
1023 U_DRAFT const icu::UnicodeSet * U_EXPORT2
1025 
1026 #endif /* U_SHOW_CPLUSPLUS_API */
1027 #endif /* U_HIDE_DRAFT_API */
1028 
1051 U_STABLE int32_t U_EXPORT2
1053  void *data, int32_t capacity,
1054  UErrorCode *status);
1055 
1056 
1057 #endif
1058 
1059 #endif /* USPOOF_H */
const USet * uspoof_getInclusionSet(UErrorCode *status)
Get the set of Candidate Characters for Inclusion in Identifiers, as defined in Unicode UAX #31...
const icu::UnicodeSet * uspoof_getInclusionUnicodeSet(UErrorCode *status)
Get the set of Candidate Characters for Inclusion in Identifiers, as defined in Unicode UAX #31...
int32_t uspoof_checkUnicodeString(const USpoofChecker *sc, const icu::UnicodeString &id, int32_t *position, UErrorCode *status)
Check the specified string for possible security issues.
void uspoof_close(USpoofChecker *sc)
Close a Spoof Checker, freeing any memory that was being held by its implementation.
URestrictionLevel
Constants from UAX #39 for use in setRestrictionLevel(), and for returned identifier restriction leve...
Definition: uspoof.h:271
Check an identifier for the presence of invisible characters, such as zero-width spaces, or character sequences that are likely not to display, such as multiple occurrences of the same non-spacing mark.
Definition: uspoof.h:221
Check that an identifier contains only characters from a single script (plus chars from the common an...
Definition: uspoof.h:212
void uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status)
Specify the set of checks that will be performed by the check functions of this Spoof Checker...
USpoofChecker * uspoof_clone(const USpoofChecker *sc, UErrorCode *status)
Clone a Spoof Checker.
Only ASCII characters: U+0000..U+007F.
Definition: uspoof.h:277
Check that an identifier contains only characters from a specified set of acceptable characters...
Definition: uspoof.h:227
Allow arbitrary mixtures of scripts.
Definition: uspoof.h:297
USpoofChecks
Enum for the kinds of checks that USpoofChecker can perform.
Definition: uspoof.h:154
C++ API: Unicode String.
USpoofChecker * uspoof_open(UErrorCode *status)
Create a Unicode Spoof Checker, configured to perform all checks except for USPOOF_LOCALE_LIMIT and U...
int32_t uspoof_check(const USpoofChecker *sc, const UChar *id, int32_t length, int32_t *position, UErrorCode *status)
Check the specified string for possible security issues.
void uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status)
Limit characters that are acceptable in identifiers being checked to those normally used with the lan...
int32_t uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status)
Get the set of checks that this Spoof Checker has been configured to perform.
USpoofChecker * uspoof_openFromSource(const char *confusables, int32_t confusablesLen, const char *confusablesWholeScript, int32_t confusablesWholeScriptLen, int32_t *errType, UParseError *pe, UErrorCode *status)
Open a Spoof Checker from the source form of the spoof data.
URestrictionLevel uspoof_getRestrictionLevel(const USpoofChecker *sc)
Get the Restriction Level that will be tested if the checks include RESTRICTION_LEVEL.
const icu::UnicodeSet * uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status)
Get a UnicodeSet for the characters permitted in an identifier.
C API: Unicode Set.
Enable all spoof checks.
Definition: uspoof.h:244
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside...
Definition: utypes.h:358
const icu::UnicodeSet * uspoof_getRecommendedUnicodeSet(UErrorCode *status)
Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined in Unicod...
int32_t uspoof_areConfusableUnicodeString(const USpoofChecker *sc, const icu::UnicodeString &s1, const icu::UnicodeString &s2, UErrorCode *status)
Check the whether two specified strings are visually confusable.
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:129
Any Case Modifier for confusable identifier tests.
Definition: uspoof.h:189
int32_t uspoof_checkUTF8(const USpoofChecker *sc, const char *id, int32_t length, int32_t *position, UErrorCode *status)
Check the specified string for possible security issues.
Whole script confusable test.
Definition: uspoof.h:181
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:308
U_I18N_API icu::UnicodeString & uspoof_getSkeletonUnicodeString(const USpoofChecker *sc, uint32_t type, const icu::UnicodeString &id, icu::UnicodeString &dest, UErrorCode *status)
Get the "skeleton" for an identifier.
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
int32_t uspoof_getSkeleton(const USpoofChecker *sc, uint32_t type, const UChar *id, int32_t length, UChar *dest, int32_t destCapacity, UErrorCode *status)
Get the "skeleton" for an identifier.
Allow Latin with other scripts except Cyrillic, Greek, Cherokee Otherwise, the same as Highly Restric...
Definition: uspoof.h:291
Check that an identifier does not include decimal digits from more than one numbering system...
Definition: uspoof.h:236
void uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status)
Limit the acceptable characters to those specified by a Unicode Set.
All characters in each identifier must be from a single script, or from the combinations: Latin + Han...
Definition: uspoof.h:285
const USet * uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status)
Get a USet for the characters permitted in an identifier.
USpoofChecker * uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength, UErrorCode *pErrorCode)
Open a Spoof checker from its serialized from, stored in 32-bit-aligned memory.
int32_t uspoof_getSkeletonUTF8(const USpoofChecker *sc, uint32_t type, const char *id, int32_t length, char *dest, int32_t destCapacity, UErrorCode *status)
Get the "skeleton" for an identifier.
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:276
struct USpoofChecker USpoofChecker
typedef for C of USpoofChecker
Definition: uspoof.h:145
int32_t uspoof_serialize(USpoofChecker *sc, void *data, int32_t capacity, UErrorCode *status)
Serialize the data for a spoof detector into a chunk of memory.
"Smart pointer" class, closes a USpoofChecker via uspoof_close().
void uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel)
Set the loosest restriction level allowed.
Single script confusable test.
Definition: uspoof.h:160
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
Definition: umachine.h:278
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:130
struct USet USet
Definition: ucnv.h:67
C API: Parse Error Information.
int32_t uspoof_areConfusableUTF8(const USpoofChecker *sc, const char *id1, int32_t length1, const char *id2, int32_t length2, UErrorCode *status)
Check the whether two specified strings are visually confusable.
const char * uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status)
Get a list of locales for the scripts that are acceptable in strings to be checked.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:476
Mixed script confusable test.
Definition: uspoof.h:170
Enable the return of auxillary (non-error) information in the upper bits of the check results value...
Definition: uspoof.h:259
Check that an identifier is no looser than the specified RestrictionLevel.
Definition: uspoof.h:204
void uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const icu::UnicodeSet *chars, UErrorCode *status)
Limit the acceptable characters to those specified by a Unicode Set.
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:56
Basic definitions for ICU, for both C and C++ APIs.
const USet * uspoof_getRecommendedSet(UErrorCode *status)
Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined in Unicod...
Any valid identifiers, including characters outside of the Identifier Profile.
Definition: uspoof.h:303
int32_t uspoof_areConfusable(const USpoofChecker *sc, const UChar *id1, int32_t length1, const UChar *id2, int32_t length2, UErrorCode *status)
Check the whether two specified strings are visually confusable.
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:245
#define U_DRAFT
This is used to declare a function as a draft public ICU C API.
Definition: umachine.h:111
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:109
C++ API: Unicode Set.