Browse Source

Disallow non-utf8 chars as input to phonenumbermatcher API (#2707)

pull/2716/head
penmetsaa 4 years ago
committed by GitHub
parent
commit
3285ff2a40
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 43 additions and 4 deletions
  1. +18
    -3
      cpp/src/phonenumbers/phonenumbermatcher.cc
  2. +8
    -1
      cpp/src/phonenumbers/phonenumbermatcher.h
  3. +17
    -0
      cpp/test/phonenumbers/phonenumbermatcher_test.cc

+ 18
- 3
cpp/src/phonenumbers/phonenumbermatcher.cc View File

@ -33,7 +33,6 @@
#include <string>
#include <utility>
#include <vector>
#include <unicode/uchar.h>
#include "phonenumbers/alternate_format.h"
@ -52,6 +51,7 @@
#include "phonenumbers/regexp_adapter_icu.h"
#include "phonenumbers/regexp_cache.h"
#include "phonenumbers/stringutil.h"
#include "phonenumbers/utf/unicodetext.h"
#ifdef I18N_PHONENUMBERS_USE_RE2
#include "phonenumbers/regexp_adapter_re2.h"
@ -407,7 +407,9 @@ PhoneNumberMatcher::PhoneNumberMatcher(const PhoneNumberUtil& util,
max_tries_(max_tries),
state_(NOT_READY),
last_match_(NULL),
search_index_(0) {
search_index_(0),
is_input_valid_utf8_(true) {
is_input_valid_utf8_ = IsInputUtf8();
}
PhoneNumberMatcher::PhoneNumberMatcher(const string& text,
@ -421,12 +423,20 @@ PhoneNumberMatcher::PhoneNumberMatcher(const string& text,
max_tries_(numeric_limits<int>::max()),
state_(NOT_READY),
last_match_(NULL),
search_index_(0) {
search_index_(0),
is_input_valid_utf8_(true) {
is_input_valid_utf8_ = IsInputUtf8();
}
PhoneNumberMatcher::~PhoneNumberMatcher() {
}
bool PhoneNumberMatcher::IsInputUtf8() {
UnicodeText number_as_unicode;
number_as_unicode.PointToUTF8(text_.c_str(), text_.size());
return number_as_unicode.UTF8WasValid();
}
// static
bool PhoneNumberMatcher::IsLatinLetter(char32 letter) {
// Combining marks are a subset of non-spacing-mark.
@ -626,6 +636,11 @@ bool PhoneNumberMatcher::ExtractMatch(const string& candidate, int offset,
}
bool PhoneNumberMatcher::HasNext() {
// Input should contain only UTF-8 characters.
if (!is_input_valid_utf8_) {
state_ = DONE;
return false;
}
if (state_ == NOT_READY) {
PhoneNumberMatch temp_match;
if (!Find(search_index_, &temp_match)) {


+ 8
- 1
cpp/src/phonenumbers/phonenumbermatcher.h View File

@ -87,7 +87,8 @@ class PhoneNumberMatcher {
~PhoneNumberMatcher();
// Returns true if the text sequence has another match.
// Returns true if the text sequence has another match. Return false if not.
// Always returns false when input contains non UTF-8 characters.
bool HasNext();
// Gets next match from text sequence.
@ -101,6 +102,9 @@ class PhoneNumberMatcher {
DONE,
};
// Checks if the to check if the provided text_ is in UTF-8 or not.
bool IsInputUtf8();
// Attempts to extract a match from a candidate string. Returns true if a
// match is found, otherwise returns false. The value "offset" refers to the
// start index of the candidate string within the overall text.
@ -202,6 +206,9 @@ class PhoneNumberMatcher {
// The next index to start searching at. Undefined in State.DONE.
int search_index_;
// Flag to set or check if input text is in UTF-8 or not.
bool is_input_valid_utf8_;
DISALLOW_COPY_AND_ASSIGN(PhoneNumberMatcher);
};


+ 17
- 0
cpp/test/phonenumbers/phonenumbermatcher_test.cc View File

@ -1078,6 +1078,23 @@ TEST_F(PhoneNumberMatcherTest, NoMatchIfNoNumber) {
EXPECT_FALSE(matcher->HasNext());
}
TEST_F(PhoneNumberMatcherTest, NoErrorWithSpecialCharacters) {
string stringWithSpecialCharacters =
"Myfuzzvar1152: \"My info:%415-666-7777 123 fake street\"\nfuzzvar1155: "
"47\nfuzzvar1158: %415-666-1234 "
"i18n_phonenumbers_Pho\356eNumberMatcher_Leniency_VALID_1"
"\nfuzzvar1159: 20316 info:%415-666-7777 123 fake str79ee\nt";
string Numbers;
for (int i = 0; i < 100; ++i)
Numbers.append(stringWithSpecialCharacters);
scoped_ptr<PhoneNumberMatcher> matcher(
GetMatcherWithLeniency(Numbers, RegionCode::US(),
PhoneNumberMatcher::POSSIBLE));
// Since the input text contains invalid UTF-8, we do not return
// any matches.
EXPECT_FALSE(matcher->HasNext());
}
TEST_F(PhoneNumberMatcherTest, Sequences) {
// Test multiple occurrences.
const string text = "Call 033316005 or 032316005!";


Loading…
Cancel
Save