/*
 * Decompiled with CFR 0.152.
 */
package org.apache.lucene.ko;

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import junit.framework.TestCase;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.analysis.ko.KoreanTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;

public class KoreanNTokenizerTest
extends TestCase {
    private String[] sources = new String[]{"\"\ub3d9\uc791\ud558\ub294 \uac00\uc7a5 \ub2e8\uc21c\ud55c \uc628\ub77c\uc778 \ub370\uc774\ud130\ubca0\uc774\uc2a4\"[1]\ub77c\uace0", "\uc704\ud0a4\uc704\ud0a4\uc6f9(WikiWikiWeb)\uc744 \uccab \uc704\ud0a4 \uc18c\ud504\ud2b8\uc6e8\uc5b4\uc778 \uc704\ud0a4\uc704\ud0a4\uc6f9(WikiWikiWeb)\uc744 \ub9cc\ub4e0 \uc6cc\ub4dc \ucee4\ub2dd\uc5c4\uc740 \uc704\ud0a4\ub97c \"\ub3d9\uc791\ud558\ub294 \uac00\uc7a5 \ub2e8\uc21c\ud55c \uc628\ub77c\uc778 \ub370\uc774\ud130\ubca0\uc774\uc2a4\"[1]\ub77c\uace0 \uc124\uba85\ud588\ub2e4.", "\uc704\ud0a4 \uc6f9\uc0ac\uc774\ud2b8\uc758 \ud55c \ubb38\uc11c\ub294 \"\uc704\ud0a4 \ubb38\uc11c\"\ub77c \ubd80\ub974\uba70, \ud558\uc774\ud37c\ub9c1\ud06c\ub85c \uc11c\ub85c \uc5f0\uacb0\ub41c \uc804\uccb4 \ubb38\uc11c\ub97c \"\uc704\ud0a4\"\ub77c \ud55c\ub2e4.", "(\uc608\ub97c \ub4e4\uc5b4 \"wiki\"\ub77c\ub294 \ubb38\uc11c\ub97c \"WiKi\"\ub85c \ud45c\uae30\ud55c\ub2e4\uac70\ub098 \ud55c\ub2e4.)"};

    public void testKoreanPattern() throws Exception {
        String[] texts = new String[]{"\ub098\ub294 \uc790\ub791\uc2a4\ub7ec\uc6b4 \ud0dc\uadf9\uae30(\ud0dc\uadf9\uae30) \uc55e\uc5d0"};
        Pattern ptn = Pattern.compile("[\"'\\[\\(\\{]+([^\"'\\]\\)\\}]+)[\"'\\]\\)\\}]+\"");
        ptn = Pattern.compile("[\\(]+([^\\)]+)[\\)]+");
        for (String text : texts) {
            String[] fragments;
            for (String fragment : fragments = text.split("[ ]+")) {
                Matcher m = ptn.matcher(fragment);
                while (m.find()) {
                    System.out.println(m.group(1));
                }
            }
        }
    }

    public void testKoreanNTokenizer() throws Exception {
        List lines = IOUtils.readLines((InputStream)new FileInputStream(new File("arirang.lucene-analyzer-4.6/resources/tokenizer/tokensample.txt")));
        for (String line : lines) {
            System.out.println(line);
            String[] sample = StringUtils.split((String)line, (String)"==>");
            if (sample.length != 2) continue;
            StringReader reader = new StringReader(sample[0]);
            KoreanTokenizer tokenizer = new KoreanTokenizer();
            tokenizer.reset();
            CharTermAttribute termAtt = (CharTermAttribute)tokenizer.addAttribute(CharTermAttribute.class);
            StringBuffer sb = new StringBuffer();
            while (tokenizer.incrementToken()) {
                if (sb.length() > 0) {
                    sb.append("/");
                }
                sb.append(termAtt.toString());
            }
            TestCase.assertEquals((String)sample[1], (String)sb.toString());
        }
    }

    public void testUtils() throws Exception {
        int i;
        ArrayList<Integer> list = new ArrayList<Integer>();
        for (i = 0; i < 100; ++i) {
            list.add(i);
        }
        this.removeLast(list, 50);
        System.out.println(list.size());
        for (i = 0; i < 100; ++i) {
            list.add(i);
        }
        this.removeLast(list, 50);
        System.out.println(list.size());
    }

    private void removeLast(List<Integer> list, int start) {
        ArrayList<Integer> removed = new ArrayList<Integer>();
        for (int i = start; i < list.size(); ++i) {
            removed.add(list.get(i));
        }
        for (Integer o : removed) {
            list.remove(0);
        }
        removed = null;
    }
}

