文字列処理 - プログラミング日記

基本的な文字列処理

文字列は文字の配列なので配列の処理に従う。

import tango.io.Stdout;

void main()
{
    char[] s = "hello, world";
    // プロパティ
    Stdout (s.length).newline;          //=> 12 (長さ)
    Stdout (s.dup).newline;             //=> hello, world (コピー)
    Stdout (s.dup.sort).newline;        //=>  ,dehllloorw (破壊的なソート)
    Stdout (s.dup.reverse).newline;     //=> dlrow ,olleh (破壊的な逆順)
    // インデクシング
    Stdout (s[0]).newline;              //=> h
    Stdout (s[$-1]).newline;            //=> d ($は文字列の長さを表す)
    Stdout (s[$]).newline;              // 例外: Array index out of bounds
    // スライシング(コピーは作成されない)
    Stdout (s[0..5]).newline;           //=> hello
    Stdout (s[0..$]).newline;           //=> hello, world
    // 結合
    Stdout (s[0..7] ~ "WORLD").newline; //=> hello, WORLD
    char[] s2 = "new ";
    s2 ~= s;
    Stdout (s2).newline;                //=> new hello, world
    // 並列コピー
    char[5] s3;
    s3[0..5] = s[0..5];
    Stdout (s3).newline;               //=> hello
    // 並列データセット
    s3[] = 'H';
    Stdout (s3).newline;               //=> HHHHH
    // 初期化
    char[5] s4;                        // 静的配列の初期化(連続した5個のcharをスタックに確保)
    char[]  s5 = new char[5];          // 動的配列の初期化(連続した5個のcharをヒープに確保)
    // サイズ変更
    s4.length = 10;                    // エラー: constant s4.length is not an lvalue
    s5.length = 10;                    // OK
}

tango.text.Utilによる文字列処理

基本的にコピーでなくスライスが返されることに注意。

import tango.io.Stdout;
import tango.text.Util;

void main()
{
    char[] s = "hello, world";
    
    // delimit, split, spltLines
    auto toks = delimit("aaa$bbb ccc_ddd", "$ _");  // デリミタを複数指定可能
    Stdout (toks).newline;                          //=> [aaa, bbb, ccc, ddd]
    auto toks2 = split("aaa, bbb, ccc, ddd", ", "); // デリミタは1つの文字列
    Stdout (toks2).newline;                         //=> [aaa, bbb, ccc, ddd]
    auto toks3 = splitLines("aaa\nbbb\nccc\nddd");  // デリミタは改行文字
    Stdout (toks3).newline;                         //=> [aaa, bbb, ccc, ddd]
    
    // delimiters, patterns, lines, quotes
    auto toks4 = delimiters("aaa$bbb ccc_ddd", "$ _");  // デリミタを複数指定可能(イテレータを返す)
    foreach(tok; toks4) { Stdout (tok)(","); }          //=> aaa,bbb,ccc,ddd,
    Stdout ("").newline;
    auto toks5 = quotes("aaa$'bbb ccc' ddd", "$ _");    // デリミタを複数指定可能(イテレータを返す)
    foreach(tok; toks5) { Stdout (tok)(","); }          //=> aaa,'bbb ccc',ddd,
    Stdout ("").newline;
    auto toks6 = patterns("aaa bbb ccc ddd", " ");      // デリミタは1つの文字列(イテレータを返す)
    foreach(tok; toks6) { Stdout (tok)(","); }          //=> aaa,bbb,ccc,ddd,
    Stdout ("").newline;
    auto toks7 = lines("aaa\nbbb\nccc\nddd");           // デリミタは改行文字(イテレータを返す)
    foreach(tok; toks7) { Stdout (tok)(","); }         //=> aaa,bbb,ccc,ddd,
    Stdout ("").newline;
    
    // join
    char[][] toks8 = ["hello", " world"];
    //// buff1のメモリが足りない場合
    //// buff1は使用されず、ヒープに確保されて、そのスライスがresultとして返る
    char[12] buff1;
    auto result = join(toks8, ",", buff1);
    Stdout ("'" ~ buff1 ~ "'", buff1.ptr).newline;      //=> '', 12fe0c
    Stdout ("'" ~ result ~ "'", result.ptr).newline;    //=> 'hello, world', 935f80
    //// buff2のメモリが足りる場合
    //// buff2のメモリが使用されて、そのスライスがresult2として返る
    char[13] buff2;
    auto result2 = join(toks8, ",", buff2);
    Stdout ("'" ~ buff2 ~ "'", buff2.ptr).newline;      //=> 'hello, world,', 12fe20
    Stdout ("'" ~ result2 ~ "'", result2.ptr).newline;  //=> 'hello, world', 12fe20
    
    // replace, substitute
    Stdout (replace(s.dup, 'o', '_')).newline;                   //=> hell_, w_rld
    Stdout (replace(s.dup, 'o', cast(char)null)).newline;        //=> hell , w rld
    Stdout (substitute(s.dup, "ll", "_").newline;                //=> he_o, world
    Stdout (substitute(s.dup, "ll", cast(char[])null)).newline;  //=> heo, world
    
    // trim, strip, stripr
    Stdout ("'" ~ trim(" hello, world  \n") ~ "'").newline;       //=> 'hello, world'
    Stdout ("'" ~ strip(" hello, world  ", ' ') ~ "'").newline;   //=> 'hello, world'
    Stdout ("'" ~ stripr(" hello, world  ", ' ') ~ "'").newline;  //=> ' hello, world'
    
    // locate, locatePrior, locatePattern, locatePatternPrior
    Stdout (locate(s, 'l')).newline;               //=> 2
    Stdout (locate(s, 'l', 4)).newline;            //=> 10 (検索のスタート位置を指定)
    Stdout (locate(s, 'a')).newline;               //=> 12 (含まれていない文字を検索、失敗すると長さを返す)
    Stdout (locatePrior(s, 'l')).newline;          //=> 10 (後ろから検索)
    Stdout (locatePattern(s, "ll")).newline;       //=> 2  (文字列を検索)
    Stdout (locatePatternPrior(s, "wo")).newline;  //=> 7  (文字列を後ろから検索)
    
    // matching, indexOf, mismatch
    Stdout (matching(cast(char*)s, cast(char*)"hello", 5)).newline;  //=> true
    Stdout (indexOf(cast(char*)s, 'l', 5)).newline;                  //=> 2
    Stdout (mismatch(cast(char*)s, cast(char*)"hellx", 5)).newline;  //=> 4
}

文字列と数値の変換

import tango.io.Stdout;
import Integer = tango.text.convert.Integer;
import Float = tango.text.convert.Float;

void main()
{
    // 文字列 => 整数
    auto i = Integer.parse("15");
    Stdout (i).newline;  //=> 15
    
    // 文字列 => 実数
    auto f = Float.parse("3.14159");
    Stdout (f).newline;  //=> 3.14
    
    // 整数 => 文字列
    char[10] output;
    auto si = Integer.format(output, 15);
    Stdout (si).newline;  //=> 15
    auto sx = Integer.format(new char[10], 15, "x#");  // 16進数に変換
    Stdout (sx).newline;  //=> 0xf
    
    // 実数 => 文字列
    auto sf = Float.format(output, 3.14);
    Stdout (sf).newline;  //=> 3.14
}