gccrs: Add Unicode check for crate_name attributes
gcc/rust/ChangeLog: * lex/rust-codepoint.h: Add comment * lex/rust-lex.h: New method to get decoded characters * rust-session-manager.cc (validate_crate_name): Modify unicode check (rust_crate_name_validation_test): Add testcases * util/rust-unicode.h (RUST_UNICODE_H): New class Utf8String. (class Utf8String): New class. * util/rust-unicode.cc (binary_search_sorted_array): Add comment. (recursive_decomp_cano): Add comment. (recomp): Remove dead code. (dump_string): Removed. gcc/testsuite/ChangeLog: * rust/compile/bad-crate-name.rs: Moved to... * rust/compile/bad-crate-name1.rs: ...here. * rust/compile/bad-crate-name2.rs: New test. Signed-off-by: Raiki Tamura <tamaron1203@gmail.com>
This commit is contained in:
parent
884dec3a42
commit
f7b2e17682
7 changed files with 59 additions and 29 deletions
|
@ -22,6 +22,8 @@
|
|||
#include "rust-system.h"
|
||||
|
||||
namespace Rust {
|
||||
|
||||
// FIXME: move this to rust-unicode.h?
|
||||
struct Codepoint
|
||||
{
|
||||
uint32_t value;
|
||||
|
|
|
@ -334,6 +334,14 @@ public:
|
|||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
tl::optional<std::vector<Codepoint>> get_chars ()
|
||||
{
|
||||
if (is_valid ())
|
||||
return {chars};
|
||||
else
|
||||
return tl::nullopt;
|
||||
}
|
||||
};
|
||||
|
||||
class FileInputSource : public InputSource
|
||||
|
|
|
@ -42,6 +42,7 @@
|
|||
#include "rust-early-name-resolver.h"
|
||||
#include "rust-cfg-strip.h"
|
||||
#include "rust-expand-visitor.h"
|
||||
#include "rust-unicode.h"
|
||||
|
||||
#include "diagnostic.h"
|
||||
#include "input.h"
|
||||
|
@ -107,30 +108,39 @@ infer_crate_name (const std::string &filename)
|
|||
return crate;
|
||||
}
|
||||
|
||||
/* Validate the crate name using the ASCII rules
|
||||
TODO: Support Unicode version of the rules */
|
||||
/* Validate the crate name using the ASCII rules */
|
||||
|
||||
static bool
|
||||
validate_crate_name (const std::string &crate_name, Error &error)
|
||||
{
|
||||
if (crate_name.empty ())
|
||||
Utf8String utf8_name = {crate_name};
|
||||
tl::optional<std::vector<Codepoint>> uchars_opt = utf8_name.get_chars ();
|
||||
|
||||
if (!uchars_opt.has_value ())
|
||||
{
|
||||
error = Error (UNDEF_LOCATION, "crate name is not a valid UTF-8 string");
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<Codepoint> uchars = uchars_opt.value ();
|
||||
if (uchars.empty ())
|
||||
{
|
||||
error = Error (UNDEF_LOCATION, "crate name cannot be empty");
|
||||
return false;
|
||||
}
|
||||
if (crate_name.length () > kMaxNameLength)
|
||||
if (uchars.size () > kMaxNameLength)
|
||||
{
|
||||
error = Error (UNDEF_LOCATION, "crate name cannot exceed %lu characters",
|
||||
(unsigned long) kMaxNameLength);
|
||||
return false;
|
||||
}
|
||||
for (auto &c : crate_name)
|
||||
for (Codepoint &c : uchars)
|
||||
{
|
||||
if (!(ISALNUM (c) || c == '_'))
|
||||
if (!(is_alphabetic (c.value) || is_numeric (c.value) || c.value == '_'))
|
||||
{
|
||||
error = Error (UNDEF_LOCATION,
|
||||
"invalid character %<%c%> in crate name: %<%s%>", c,
|
||||
crate_name.c_str ());
|
||||
"invalid character %<%s%> in crate name: %<%s%>",
|
||||
c.as_string ().c_str (), crate_name.c_str ());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -1273,13 +1283,17 @@ rust_crate_name_validation_test (void)
|
|||
ASSERT_TRUE (Rust::validate_crate_name ("example", error));
|
||||
ASSERT_TRUE (Rust::validate_crate_name ("abcdefg_1234", error));
|
||||
ASSERT_TRUE (Rust::validate_crate_name ("1", error));
|
||||
// FIXME: The next test does not pass as of current implementation
|
||||
// ASSERT_TRUE (Rust::CompileOptions::validate_crate_name ("惊吓"));
|
||||
ASSERT_TRUE (Rust::validate_crate_name ("クレート", error));
|
||||
ASSERT_TRUE (Rust::validate_crate_name ("Sōkrátēs", error));
|
||||
ASSERT_TRUE (Rust::validate_crate_name ("惊吓", error));
|
||||
|
||||
// NOTE: - is not allowed in the crate name ...
|
||||
|
||||
ASSERT_FALSE (Rust::validate_crate_name ("abcdefg-1234", error));
|
||||
ASSERT_FALSE (Rust::validate_crate_name ("a+b", error));
|
||||
ASSERT_FALSE (Rust::validate_crate_name ("/a+b/", error));
|
||||
ASSERT_FALSE (Rust::validate_crate_name ("😸++", error));
|
||||
ASSERT_FALSE (Rust::validate_crate_name ("∀", error));
|
||||
|
||||
/* Tests for crate name inference */
|
||||
ASSERT_EQ (Rust::infer_crate_name ("c.rs"), "c");
|
||||
|
|
|
@ -12,6 +12,7 @@ typedef std::vector<codepoint_t> string_t;
|
|||
template <std::size_t SIZE>
|
||||
int64_t
|
||||
binary_search_ranges (
|
||||
// FIXME: use binray search function from <algorithm>
|
||||
const std::array<std::pair<uint32_t, uint32_t>, SIZE> &ranges,
|
||||
uint32_t target_cp)
|
||||
{
|
||||
|
@ -49,6 +50,7 @@ int64_t
|
|||
binary_search_sorted_array (const std::array<uint32_t, SIZE> &array,
|
||||
uint32_t target)
|
||||
{
|
||||
// FIXME: use binray search function from <algorithm>
|
||||
if (SIZE == 0)
|
||||
return -1;
|
||||
|
||||
|
@ -104,9 +106,7 @@ recursive_decomp_cano (codepoint_t c, string_t &buf)
|
|||
{
|
||||
string_t decomped = it->second;
|
||||
for (codepoint_t cp : decomped)
|
||||
{
|
||||
recursive_decomp_cano (cp, buf);
|
||||
}
|
||||
recursive_decomp_cano (cp, buf);
|
||||
}
|
||||
else
|
||||
buf.push_back (c);
|
||||
|
@ -152,8 +152,7 @@ recomp (string_t s)
|
|||
if (s.size () > 0)
|
||||
{
|
||||
int last_class = -1;
|
||||
// int starter_pos = 0; // Assume the first character is Starter. Correct?
|
||||
// int target_pos = 1;
|
||||
// Assume the first character is Starter.
|
||||
codepoint_t starter_ch = s[0];
|
||||
for (unsigned int src_pos = 1; src_pos < s.size (); src_pos++)
|
||||
{
|
||||
|
@ -189,20 +188,6 @@ recomp (string_t s)
|
|||
return buf;
|
||||
}
|
||||
|
||||
// TODO: remove
|
||||
/*
|
||||
void
|
||||
dump_string (std::vector<uint32_t> s)
|
||||
{
|
||||
std::cout << "dump=";
|
||||
for (auto c : s)
|
||||
{
|
||||
std::cout << std::hex << c << ", ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
*/
|
||||
|
||||
string_t
|
||||
nfc_normalize (string_t s)
|
||||
{
|
||||
|
|
|
@ -19,10 +19,29 @@
|
|||
#ifndef RUST_UNICODE_H
|
||||
#define RUST_UNICODE_H
|
||||
|
||||
#include "optional.h"
|
||||
#include "rust-system.h"
|
||||
#include "rust-lex.h"
|
||||
|
||||
namespace Rust {
|
||||
|
||||
class Utf8String
|
||||
{
|
||||
private:
|
||||
tl::optional<std::vector<Codepoint>> chars;
|
||||
|
||||
public:
|
||||
Utf8String (const std::string &maybe_utf8)
|
||||
{
|
||||
Lexer::BufferInputSource input_source = {maybe_utf8, 0};
|
||||
chars = input_source.get_chars ();
|
||||
}
|
||||
|
||||
// Returns UTF codepoints when string is valid as UTF-8, returns nullopt
|
||||
// otherwise.
|
||||
tl::optional<std::vector<Codepoint>> get_chars () const { return chars; }
|
||||
};
|
||||
|
||||
// TODO: add function nfc_normalize
|
||||
|
||||
bool
|
||||
|
|
2
gcc/testsuite/rust/compile/bad-crate-name2.rs
Normal file
2
gcc/testsuite/rust/compile/bad-crate-name2.rs
Normal file
|
@ -0,0 +1,2 @@
|
|||
#![crate_name = "😅"] // { dg-error "invalid character ...." "" }
|
||||
fn main() {}
|
Loading…
Add table
Reference in a new issue