diff --git a/third_party/blink/renderer/platform/BUILD.gn b/third_party/blink/renderer/platform/BUILD.gn
index 2c079c06c5..34a844659b 100644
--- a/third_party/blink/renderer/platform/BUILD.gn
+++ b/third_party/blink/renderer/platform/BUILD.gn
@@ -118,10 +118,7 @@ blink_python_runner("color_data") {
compiled_action("character_data") {
tool = ":character_data_generator"
- outputs = [
- "$blink_platform_output_dir/character_property_data.cc",
- "$blink_platform_output_dir/text/break_iterator_data_inline_header.h",
- ]
+ outputs = [ "$blink_platform_output_dir/character_property_data.cc" ]
args = rebase_path(outputs, root_build_dir)
}
diff --git a/third_party/blink/renderer/platform/runtime_enabled_features.json5 b/third_party/blink/renderer/platform/runtime_enabled_features.json5
index 28d34020f7..45942f4e89 100644
--- a/third_party/blink/renderer/platform/runtime_enabled_features.json5
+++ b/third_party/blink/renderer/platform/runtime_enabled_features.json5
@@ -509,11 +509,6 @@
public: true,
status: "experimental",
},
- {
- // crbug.com/41485013
- name: "BreakIteratorDataGenerator",
- status: "stable",
- },
{
name: "BrowserVerifiedUserActivationKeyboard",
base_feature: "none",
diff --git a/third_party/blink/renderer/platform/text/character_property_data_generator.cc b/third_party/blink/renderer/platform/text/character_property_data_generator.cc
index 70101dbf76..27e6852647 100644
--- a/third_party/blink/renderer/platform/text/character_property_data_generator.cc
+++ b/third_party/blink/renderer/platform/text/character_property_data_generator.cc
@@ -5,19 +5,13 @@
#include "third_party/blink/renderer/platform/text/character_property_data.h"
#include <stdio.h>
-#include <unicode/brkiter.h>
-#include <unicode/locid.h>
#include <unicode/ucptrie.h>
-#include <unicode/udata.h>
-#include <unicode/ulocdata.h>
#include <unicode/umutablecptrie.h>
#include <unicode/uniset.h>
#include <unicode/unistr.h>
#include <cassert>
#include <cstring>
-#include <filesystem>
-#include <fstream>
#include <iterator>
#include <memory>
@@ -31,36 +25,6 @@
namespace blink {
namespace {
-#define CHECK_U_ERROR(error, name) \
- CHECK(U_SUCCESS(error)) << name << ": (" << error << ")" << u_errorName(error)
-
-//
-// Load the ICU data file and set it to the ICU.
-//
-void InitializeIcu(const char* exec_path) {
- // ICU can't load the data file by itself because ICU tries to load the
- // versioned data file (e.g., "icudt73l.dat"), while the Chromium build system
- // creates the unversioned data file (e.g., "icudtl.dat").
- std::filesystem::path path{exec_path};
- path = path.parent_path() / "icudt" U_ICUDATA_TYPE_LETTER ".dat";
-
- std::ifstream data_ifstream(path, std::ios_base::binary);
- CHECK(data_ifstream.is_open());
- static std::vector<uint8_t> icu_data;
- CHECK(icu_data.empty());
- std::copy(std::istreambuf_iterator<char>(data_ifstream),
- std::istreambuf_iterator<char>(), std::back_inserter(icu_data));
- UErrorCode error = U_ZERO_ERROR;
- udata_setCommonData(icu_data.data(), &error);
- CHECK_U_ERROR(error, "udata_setCommonData");
-
- // Check ICU functions that need the data resources are working.
- // https://unicode-org.github.io/icu/userguide/icu/design.html#icu4c-initialization-and-termination
- UVersionInfo version;
- ulocdata_getCLDRVersion(version, &error);
- CHECK_U_ERROR(error, "ulocdata_getCLDRVersion");
-}
-
class CharacterPropertyValues {
public:
constexpr static UChar32 kMaxCodepoint = 0x10FFFF;
@@ -255,201 +219,18 @@ static void GenerateCharacterPropertyData(FILE* fp) {
GenerateUTrieSerialized(fp, serialized_size, serialized);
}
-//
-// Generate a line break pair table in `break_iterator_data_inline_header.h`.
-//
-// See [UAX14](https://unicode.org/reports/tr14/).
-//
-class LineBreakData {
- public:
- LineBreakData() = default;
-
- static void Generate(FILE* fp) {
- LineBreakData data;
- data.FillFromIcu();
- data.FillAscii();
- data.Print(fp);
- }
-
- private:
- // Fill the pair table from the ICU BreakIterator.
- void FillFromIcu() {
- UErrorCode status = U_ZERO_ERROR;
- const icu::Locale locale("en");
- icu::BreakIterator* break_iterator =
- icu::BreakIterator::createLineInstance(locale, status);
- CHECK_U_ERROR(status, "createLineInstance");
-
- for (UChar ch = kMinChar; ch <= kMaxChar; ++ch) {
- const icu::UnicodeString ch_str(ch);
- for (UChar ch_next = kMinChar; ch_next <= kMaxChar; ++ch_next) {
- const icu::UnicodeString ch_next_str(ch_next);
- const icu::UnicodeString str = ch_str + ch_next_str;
- break_iterator->setText(str);
- SetPairValue(ch, ch_next, break_iterator->isBoundary(1));
- }
- }
- }
-
- // Line breaking table for printable ASCII characters. Line breaking
- // opportunities in this table are as below:
- // - before opening punctuations such as '(', '<', '[', '{' after certain
- // characters (compatible with Firefox 3.6);
- // - after '-' and '?' (backward-compatible, and compatible with Internet
- // Explorer).
- // Please refer to <https://bugs.webkit.org/show_bug.cgi?id=37698> for line
- // breaking matrixes of different browsers and the ICU standard.
- void FillAscii() {
-#define ALL_CHAR '!', 0x7F
- SetPairValue(ALL_CHAR, ALL_CHAR, false);
- SetPairValue(ALL_CHAR, '(', '(', true);
- SetPairValue(ALL_CHAR, '<', '<', true);
- SetPairValue(ALL_CHAR, '[', '[', true);
- SetPairValue(ALL_CHAR, '{', '{', true);
- SetPairValue('-', '-', ALL_CHAR, true);
- SetPairValue('?', '?', ALL_CHAR, true);
- SetPairValue('-', '-', '$', '$', false);
- SetPairValue(ALL_CHAR, '!', '!', false);
- SetPairValue('?', '?', '"', '"', false);
- SetPairValue('?', '?', '\'', '\'', false);
- SetPairValue(ALL_CHAR, ')', ')', false);
- SetPairValue(ALL_CHAR, ',', ',', false);
- SetPairValue(ALL_CHAR, '.', '.', false);
- SetPairValue(ALL_CHAR, '/', '/', false);
- // Note: Between '-' and '[0-9]' is hard-coded in `ShouldBreakFast()`.
- SetPairValue('-', '-', '0', '9', false);
- SetPairValue(ALL_CHAR, ':', ':', false);
- SetPairValue(ALL_CHAR, ';', ';', false);
- SetPairValue(ALL_CHAR, '?', '?', false);
- SetPairValue(ALL_CHAR, ']', ']', false);
- SetPairValue(ALL_CHAR, '}', '}', false);
- SetPairValue('$', '$', ALL_CHAR, false);
- SetPairValue('\'', '\'', ALL_CHAR, false);
- SetPairValue('(', '(', ALL_CHAR, false);
- SetPairValue('/', '/', ALL_CHAR, false);
- SetPairValue('0', '9', ALL_CHAR, false);
- SetPairValue('<', '<', ALL_CHAR, false);
- SetPairValue('@', '@', ALL_CHAR, false);
- SetPairValue('A', 'Z', ALL_CHAR, false);
- SetPairValue('[', '[', ALL_CHAR, false);
- SetPairValue('^', '`', ALL_CHAR, false);
- SetPairValue('a', 'z', ALL_CHAR, false);
- SetPairValue('{', '{', ALL_CHAR, false);
- SetPairValue(0x7F, 0x7F, ALL_CHAR, false);
-#undef ALL_CHAR
- }
-
- // Print the C++ source code.
- void Print(FILE* fp) {
- // Print file headers.
- fprintf(fp,
- "#include <cstdint>\n"
- "#include "
- "\"third_party/blink/renderer/platform/wtf/text/wtf_uchar.h\"\n"
- "\nnamespace {\n\n");
-
- fprintf(fp, "constexpr UChar kFastLineBreakMinChar = 0x%02X;\n", kMinChar);
- fprintf(fp, "constexpr UChar kFastLineBreakMaxChar = 0x%02X;\n", kMaxChar);
-
- // Define macros.
- fprintf(fp,
- "\n#define B(a, b, c, d, e, f, g, h)"
- " ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3) |"
- " ((e) << 4) | ((f) << 5) | ((g) << 6) | ((h) << 7))\n\n");
-
- fprintf(fp, "const uint8_t kFastLineBreakTable[%d][%d] = {\n", kNumChars,
- kNumCharsRoundUp8 / 8);
-
- // Print the column comment.
- fprintf(fp, " /*");
- for (UChar ch = kMinChar; ch <= kMaxChar; ++ch) {
- if (ch != kMinChar && (ch - kMinChar) % 8 == 0) {
- fprintf(fp, " ");
- }
- fprintf(fp, ch < 0x7F ? " %c" : "%02X", ch);
- }
- fprintf(fp, " */\n");
-
- // Print the data array.
- for (int y = 0; y < kNumChars; ++y) {
- const UChar ch = y + kMinChar;
- fprintf(fp, "/* %02X %c */ {B(", ch, ch < 0x7F ? ch : ' ');
- const char* prefix = "";
- for (int x = 0; x < kNumCharsRoundUp8; ++x) {
- fprintf(fp, "%s%d", prefix, pair_[y][x]);
- prefix = (x % 8 == 7) ? "),B(" : ",";
- }
- fprintf(fp, ")},\n");
- }
- fprintf(fp,
- "};\n\n"
- "#undef B\n\n"
- "template <typename T>\n"
- "inline uint8_t GetFastLineBreak(T ch1, T ch2) {\n"
- " const T i2 = ch2 - kFastLineBreakMinChar;\n"
- " return kFastLineBreakTable[ch1 - kFastLineBreakMinChar]"
- "[i2 / 8] & (1 << (i2 %% 8));\n"
- "}\n\n"
- "} // namespace\n");
- }
-
- void SetPairValue(UChar ch1_min,
- UChar ch1_max,
- UChar ch2_min,
- UChar ch2_max,
- bool value) {
- for (UChar ch1 = ch1_min; ch1 <= ch1_max; ++ch1) {
- for (UChar ch2 = ch2_min; ch2 <= ch2_max; ++ch2) {
- SetPairValue(ch1, ch2, value);
- }
- }
- }
-
- // Set the breakability between `ch1` and `ch2`.
- void SetPairValue(UChar ch1, UChar ch2, bool value) {
- CHECK_GE(ch1, kMinChar);
- CHECK_LE(ch1, kMaxChar);
- CHECK_GE(ch2, kMinChar);
- CHECK_LE(ch2, kMaxChar);
- pair_[ch1 - kMinChar][ch2 - kMinChar] = value;
- }
-
- constexpr static UChar kMinChar = '!';
- constexpr static UChar kMaxChar = 0xFF;
- constexpr static int kNumChars = kMaxChar - kMinChar + 1;
- constexpr static int kNumCharsRoundUp8 = (kNumChars + 7) / 8 * 8;
- bool pair_[kNumChars][kNumCharsRoundUp8]{};
-};
-
-void InvokeGenerator(int index,
- int argc,
- char** argv,
- void (*generator)(FILE*)) {
- if (index >= argc) {
- return;
- }
- const char* path = argv[index];
- if (!*path) {
- return;
- }
-
- if (strcmp(path, "-") == 0) {
- (*generator)(stdout);
- return;
- }
-
- FILE* fp = fopen(path, "wb");
- (*generator)(fp);
- fclose(fp);
-}
-
} // namespace
} // namespace blink
int main(int argc, char** argv) {
- blink::InitializeIcu(argv[0]);
- blink::InvokeGenerator(1, argc, argv, blink::GenerateCharacterPropertyData);
- blink::InvokeGenerator(2, argc, argv, blink::LineBreakData::Generate);
+ // Write the serialized array to the source file.
+ if (argc <= 1) {
+ blink::GenerateCharacterPropertyData(stdout);
+ } else {
+ FILE* fp = fopen(argv[1], "wb");
+ blink::GenerateCharacterPropertyData(fp);
+ fclose(fp);
+ }
return 0;
}
diff --git a/third_party/blink/renderer/platform/text/text_break_iterator.cc b/third_party/blink/renderer/platform/text/text_break_iterator.cc
index 650eb760b2..14de47d9fe 100644
--- a/third_party/blink/renderer/platform/text/text_break_iterator.cc
+++ b/third_party/blink/renderer/platform/text/text_break_iterator.cc
@@ -26,8 +26,6 @@
#include <unicode/uchar.h>
#include <unicode/uvernum.h>
-#include "third_party/blink/renderer/platform/runtime_enabled_features.h"
-#include "third_party/blink/renderer/platform/text/break_iterator_data_inline_header.h"
#include "third_party/blink/renderer/platform/wtf/std_lib_extras.h"
#include "third_party/blink/renderer/platform/wtf/text/ascii_ctype.h"
#include "third_party/blink/renderer/platform/wtf/text/character_names.h"
@@ -113,9 +111,6 @@ static const UChar kAsciiLineBreakTableLastChar = 127;
#define F 0xFF
-// Check if the generated table match the `kAsciiLineBreakTable` table.
-#define CHECK_ASCII_LINE_BRAEK_TABLE 0
-
// Line breaking table for printable ASCII characters. Line breaking
// opportunities in this table are as below:
// - before opening punctuations such as '(', '<', '[', '{' after certain
@@ -166,24 +161,6 @@ static const unsigned char kAsciiLineBreakTable[][(kAsciiLineBreakTableLastChar
};
// clang-format on
-#if CHECK_ASCII_LINE_BRAEK_TABLE
-void CheckAsciiLineBreakTable() {
- for (UChar ch2 = kAsciiLineBreakTableFirstChar;
- ch2 <= kAsciiLineBreakTableLastChar; ++ch2) {
- for (UChar ch1 = kAsciiLineBreakTableFirstChar;
- ch1 <= kAsciiLineBreakTableLastChar; ++ch1) {
- const UChar i2 = ch2 - kAsciiLineBreakTableFirstChar;
- const bool ascii =
- kAsciiLineBreakTable[ch1 - kAsciiLineBreakTableFirstChar][i2 / 8] &
- (1 << (i2 % 8));
- const bool fast = GetFastLineBreak(ch1, ch2);
- CHECK_EQ(ascii, fast)
- << String::Format("%02X/%02X (%c/%c)", ch1, ch2, ch1, ch2);
- }
- }
-}
-#endif // CHECK_ASCII_LINE_BRAEK_TABLE
-
#define BA_LB_COUNT U_LB_COUNT
// Line breaking table for CSS word-break: break-all. This table differs from
// asciiLineBreakTable in:
@@ -298,13 +275,7 @@ static inline bool ShouldKeepAfterKeepAll(UChar last_ch,
}
inline bool NeedsLineBreakIterator(UChar ch) {
- if (UNLIKELY(!RuntimeEnabledFeatures::BreakIteratorDataGeneratorEnabled())) {
- return ch > kAsciiLineBreakTableLastChar && ch != kNoBreakSpaceCharacter;
- }
- static_assert(kFastLineBreakMaxChar >= kAsciiLineBreakTableLastChar);
- static_assert(kNoBreakSpaceCharacter <= kFastLineBreakMaxChar,
- "Include NBSP for the performance.");
- return ch > kFastLineBreakMaxChar;
+ return ch > kAsciiLineBreakTableLastChar && ch != kNoBreakSpaceCharacter;
}
template <typename CharacterType>
@@ -349,61 +320,31 @@ struct LazyLineBreakIterator::Context {
last = current;
}
- bool ShouldBreakFast(bool disable_soft_hyphen) const {
-#if CHECK_ASCII_LINE_BRAEK_TABLE
- DEFINE_STATIC_LOCAL(bool, is_check_done, (false));
- if (!is_check_done) {
- is_check_done = true;
- CheckAsciiLineBreakTable();
- LOG(INFO) << "CheckAsciiLineBreakTable() completed.";
- }
-#endif // CHECK_ASCII_LINE_BRAEK_TABLE
-
+ bool ShouldBreakFast() const {
const UChar last_ch = last.ch;
const UChar ch = current.ch;
- static_assert(kFastLineBreakMinChar == kAsciiLineBreakTableFirstChar);
- if (UNLIKELY(last_ch < kFastLineBreakMinChar ||
- ch < kFastLineBreakMinChar)) {
+ if (UNLIKELY(last_ch < kAsciiLineBreakTableFirstChar ||
+ ch < kAsciiLineBreakTableFirstChar)) {
return false;
}
// Don't allow line breaking between '-' and a digit if the '-' may mean a
// minus sign in the context, while allow breaking in 'ABCD-1234' and
// '1234-5678' which may be in long URLs.
- static_assert('-' >= kFastLineBreakMinChar);
+ static_assert('-' >= kAsciiLineBreakTableFirstChar);
if (last_ch == '-' && IsASCIIDigit(ch)) {
return IsASCIIAlphanumeric(last_last_ch);
}
- if (UNLIKELY(
- !RuntimeEnabledFeatures::BreakIteratorDataGeneratorEnabled())) {
- // If both `last_ch` and `ch` are ASCII characters, use a lookup table for
- // enhanced speed and for compatibility with other browsers (see comments
- // for asciiLineBreakTable for details).
- if (last_ch <= kAsciiLineBreakTableLastChar &&
- ch <= kAsciiLineBreakTableLastChar) {
- const unsigned char* table_row =
- kAsciiLineBreakTable[last_ch - kAsciiLineBreakTableFirstChar];
- int ch_index = ch - kAsciiLineBreakTableFirstChar;
- return table_row[ch_index / 8] & (1 << (ch_index % 8));
- }
-
- // Otherwise defer to the Unicode algorithm by returning false.
- return false;
- }
-
- // If both characters are in the fast line break table, use it for enhanced
- // speed. For ASCII characters, it is also for compatibility. The table is
- // generated at the build time, see the `LineBreakData` class.
- if (last_ch <= kFastLineBreakMaxChar && ch <= kFastLineBreakMaxChar) {
- if (!GetFastLineBreak(last_ch, ch)) {
- return false;
- }
- static_assert(kSoftHyphenCharacter <= kFastLineBreakMaxChar);
- if (UNLIKELY(disable_soft_hyphen && last_ch == kSoftHyphenCharacter)) {
- return false;
- }
- return true;
+ // If both `last_ch` and `ch` are ASCII characters, use a lookup table for
+ // enhanced speed and for compatibility with other browsers (see comments
+ // for asciiLineBreakTable for details).
+ if (last_ch <= kAsciiLineBreakTableLastChar &&
+ ch <= kAsciiLineBreakTableLastChar) {
+ const unsigned char* table_row =
+ kAsciiLineBreakTable[last_ch - kAsciiLineBreakTableFirstChar];
+ int ch_index = ch - kAsciiLineBreakTableFirstChar;
+ return table_row[ch_index / 8] & (1 << (ch_index % 8));
}
// Otherwise defer to the Unicode algorithm by returning false.
@@ -452,7 +393,7 @@ inline int LazyLineBreakIterator::NextBreakablePosition(
break;
}
- if (context.ShouldBreakFast(disable_soft_hyphen_)) {
+ if (context.ShouldBreakFast()) {
return i;
}