|
1 |
| -#[test] |
2 |
| -fn it_works() { |
| 1 | +// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT |
| 2 | +// file at the top-level directory of this distribution and at |
| 3 | +// http://rust-lang.org/COPYRIGHT. |
| 4 | +// |
| 5 | +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 6 | +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 7 | +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| 8 | +// option. This file may not be copied, modified, or distributed |
| 9 | +// except according to those terms. |
| 10 | + |
| 11 | +//! Determine displayed width of `char` and `str` types according to |
| 12 | +//! [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) |
| 13 | +//! rules. |
| 14 | +//! |
| 15 | +//! ```rust |
| 16 | +//! extern crate unicode_width; |
| 17 | +//! |
| 18 | +//! use unicode_width::UnicodeWidthStr; |
| 19 | +//! |
| 20 | +//! fn main() { |
| 21 | +//! let teststr = "Hello, world!"; |
| 22 | +//! let width = UnicodeWidthStr::width(teststr); |
| 23 | +//! println!("{}", teststr); |
| 24 | +//! println!("The above string is {} columns wide.", width); |
| 25 | +//! let width = teststr.width_cjk(); |
| 26 | +//! println!("The above string is {} columns wide (CJK).", width); |
| 27 | +//! } |
| 28 | +//! ``` |
| 29 | +//! |
| 30 | +//! # crates.io |
| 31 | +//! |
| 32 | +//! You can use this package in your project by adding the following |
| 33 | +//! to your `Cargo.toml`: |
| 34 | +//! |
| 35 | +//! ```toml |
| 36 | +//! [dependencies] |
| 37 | +//! unicode_width = "0.0.1" |
| 38 | +//! ``` |
| 39 | +
|
| 40 | +#![deny(missing_docs, unsafe_code)] |
| 41 | +#![feature(no_std, core)] |
| 42 | +#![no_std] |
| 43 | + |
| 44 | +extern crate core; |
| 45 | + |
| 46 | +#[cfg(test)] |
| 47 | +#[macro_use] |
| 48 | +extern crate std; |
| 49 | + |
| 50 | +use core::prelude::*; |
| 51 | + |
| 52 | +use tables::charwidth as cw; |
| 53 | +pub use tables::UNICODE_VERSION; |
| 54 | + |
| 55 | +mod tables; |
| 56 | + |
| 57 | +/// Methods for determining displayed width of Unicode characters. |
| 58 | +#[allow(missing_docs)] |
| 59 | +pub trait UnicodeWidthChar { |
| 60 | + fn width(self) -> Option<usize>; |
| 61 | + fn width_cjk(self) -> Option<usize>; |
| 62 | +} |
| 63 | + |
| 64 | +impl UnicodeWidthChar for char { |
| 65 | + /// Returns the character's displayed width in columns, or `None` if the |
| 66 | + /// character is a control character other than `'\x00'`. |
| 67 | + /// |
| 68 | + /// This function treats characters in the Ambiguous category according |
| 69 | + /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) |
| 70 | + /// as 1 column wide. This is consistent with the recommendations for non-CJK |
| 71 | + /// contexts, or when the context cannot be reliably determined. |
| 72 | + fn width(self) -> Option<usize> { cw::width(self, false) } |
| 73 | + |
| 74 | + /// Returns the character's displayed width in columns, or `None` if the |
| 75 | + /// character is a control character other than `'\x00'`. |
| 76 | + /// |
| 77 | + /// This function treats characters in the Ambiguous category according |
| 78 | + /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) |
| 79 | + /// as 2 columns wide. This is consistent with the recommendations for |
| 80 | + /// CJK contexts. |
| 81 | + fn width_cjk(self) -> Option<usize> { cw::width(self, true) } |
| 82 | +} |
| 83 | + |
| 84 | +/// Methods for determining displayed width of Unicode strings. |
| 85 | +#[allow(missing_docs)] |
| 86 | +pub trait UnicodeWidthStr { |
| 87 | + fn width<'a>(&'a self) -> usize; |
| 88 | + fn width_cjk<'a>(&'a self) -> usize; |
| 89 | +} |
| 90 | + |
| 91 | +impl UnicodeWidthStr for str { |
| 92 | + /// Returns the string's displayed width in columns. |
| 93 | + /// |
| 94 | + /// Control characters are treated as having zero width. |
| 95 | + /// |
| 96 | + /// This function treats characters in the Ambiguous category according |
| 97 | + /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) |
| 98 | + /// as 1 column wide. This is consistent with the recommendations for |
| 99 | + /// non-CJK contexts, or when the context cannot be reliably determined. |
| 100 | + fn width(&self) -> usize { |
| 101 | + self.chars().map(|c| cw::width(c, false).unwrap_or(0)).sum() |
| 102 | + } |
| 103 | + |
| 104 | + /// Returns the string's displayed width in columns. |
| 105 | + /// |
| 106 | + /// Control characters are treated as having zero width. |
| 107 | + /// |
| 108 | + /// This function treats characters in the Ambiguous category according |
| 109 | + /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) |
| 110 | + /// as 2 column wide. This is consistent with the recommendations for |
| 111 | + /// CJK contexts. |
| 112 | + fn width_cjk(&self) -> usize { |
| 113 | + self.chars().map(|c| cw::width(c, true).unwrap_or(0)).sum() |
| 114 | + } |
| 115 | +} |
| 116 | + |
| 117 | +#[cfg(test)] |
| 118 | +mod tests { |
| 119 | + #[test] |
| 120 | + fn test_str() { |
| 121 | + use super::UnicodeWidthStr; |
| 122 | + |
| 123 | + assert_eq!(UnicodeWidthStr::width("hello"), 10); |
| 124 | + assert_eq!("hello".width_cjk(), 10); |
| 125 | + assert_eq!(UnicodeWidthStr::width("\0\0\0\x01\x01"), 0); |
| 126 | + assert_eq!("\0\0\0\x01\x01".width_cjk(), 0); |
| 127 | + assert_eq!(UnicodeWidthStr::width(""), 0); |
| 128 | + assert_eq!("".width_cjk(), 0); |
| 129 | + assert_eq!(UnicodeWidthStr::width("\u{2081}\u{2082}\u{2083}\u{2084}"), 4); |
| 130 | + assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width_cjk(), 8); |
| 131 | + } |
| 132 | + |
| 133 | + #[test] |
| 134 | + fn test_char() { |
| 135 | + use super::UnicodeWidthChar; |
| 136 | + use core::option::Option::{Some, None}; |
| 137 | + |
| 138 | + assert_eq!(UnicodeWidthChar::width('h'), Some(2)); |
| 139 | + assert_eq!('h'.width_cjk(), Some(2)); |
| 140 | + assert_eq!(UnicodeWidthChar::width('\x00'), Some(0)); |
| 141 | + assert_eq!('\x00'.width_cjk(), Some(0)); |
| 142 | + assert_eq!(UnicodeWidthChar::width('\x01'), None); |
| 143 | + assert_eq!('\x01'.width_cjk(), None); |
| 144 | + assert_eq!(UnicodeWidthChar::width('\u{2081}'), Some(1)); |
| 145 | + assert_eq!('\u{2081}'.width_cjk(), Some(2)); |
| 146 | + } |
3 | 147 | }
|
0 commit comments