2021年12月20日 星期一

(Simple) Chinese Hyphenation for Unity - 斷詞

本篇使用自然語言處理其中最基礎的一環,就是斷詞。斷詞方法有很多,這裡只舉長詞優先。長詞優先,主要中文特性的規範存在「字→詞→句」構成。需要由詞的部分開始下手。

所先需要資料庫存放詞語數據,再由演算法比對的方式來呈現。得到結果後就可以做出各式各樣的功能 (如: 關鍵字搜尋)。

GitHub 專案

 

 

 

 

 

 

 

 

 

 

 

 

 

Hyphenation 腳本(斷詞類別)

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using System;

public abstract class Hyphenation
{
    protected List<string> ward;
    protected string dataBaseTitlle;

    public Hyphenation(List<string>ward, string dataBaseTitlle)
    {
        this.ward = ward;
        this.dataBaseTitlle = dataBaseTitlle;
    }

    public abstract List<string> reverse(MySQLFunction mySQLFunction);
    public abstract List<string> forward(MySQLFunction mySQLFunction);

    public abstract List<string> speech(MySQLFunction mySQLFunction);
}

HyphenationSetting 腳本

using System.Collections;
using System.Collections.Generic;
using UnityEngine;

public class HyphenationSetting : Hyphenation
{
    private int forwardWardNumber = 0;
    private int reverceWardNumber = 0;

    public HyphenationSetting(List<string> ward, string dataBaseTitlle) : base(ward, dataBaseTitlle) { }

    // 正向
    public override List<string> forward(MySQLFunction mySQLFunction)
    {
        List<string> list = new List<string>();
        list.Clear();

        forwardWardNumber = 0;

        while (ward.Count >= 1)
        {
            if (forwardWardNumber < ward.Count)
            {
                if (ward[forwardWardNumber] != "")
                {
                    for (int y = 0; y <= ward[forwardWardNumber].Length; y++)
                    {
                        string ForwardMessage = ward[forwardWardNumber].Substring(0, ward[forwardWardNumber].Length - y);
                        if (ForwardMessage == mySQLFunction.inquire(dataBaseTitlle, 0, ForwardMessage))
                        {
                            list.Add(ForwardMessage);
                            ward[forwardWardNumber] = ward[forwardWardNumber].Remove(0, ForwardMessage.Length);
                        }
                        else if (ForwardMessage.Length == 1 && ward[forwardWardNumber] != mySQLFunction.inquire(dataBaseTitlle, 0, ForwardMessage))
                        {
                            list.Add(ForwardMessage);
                            ward[forwardWardNumber] = ward[forwardWardNumber].Remove(0, ForwardMessage.Length);
                        }
                    }
                }
                else
                    forwardWardNumber += 1;
            }
            else ward.Clear();
        }

        return list;
    }

    // 反向
    public override List<string> reverse(MySQLFunction mySQLFunction)
    {
        List<string> list = new List<string>();
        list.Clear();

        reverceWardNumber = 0;

        while (ward.Count >= 1)
        {
            if (reverceWardNumber < ward.Count)
            {
                if (ward[reverceWardNumber] != "")
                {
                    for (int y = 0; y <= ward[reverceWardNumber].Length; y++)
                    {
                        string ReverseMessage = ward[reverceWardNumber].Substring(y);

                        if (ReverseMessage == mySQLFunction.inquire(dataBaseTitlle, 0, ReverseMessage))
                        {
                            list.Add(ReverseMessage);
                            ward[reverceWardNumber] = ward[reverceWardNumber].Substring(0, ward[reverceWardNumber].Length - ReverseMessage.Length);
                        }
                        else if (ReverseMessage.Length <= 1 && ReverseMessage != mySQLFunction.inquire(dataBaseTitlle, 0, ReverseMessage))
                        {
                            list.Add(ReverseMessage);
                            ward[reverceWardNumber] = ward[reverceWardNumber].Substring(0, ward[reverceWardNumber].Length - ReverseMessage.Length);
                        }
                    }
                }
                else
                    reverceWardNumber += 1;
            }
            else ward.Clear();
        }

        return list;
    }

    // 詞性
    public override List<string> speech(MySQLFunction mySQLFunction)
    {
        List<string> list = new List<string>();

        foreach (string str in ward)
        {
            if (mySQLFunction.inquire(dataBaseTitlle, 1, str) != "")
                list.Add(mySQLFunction.inquire(dataBaseTitlle, 1, str));

            else list.Add("?");
        }

        return list;
    }
}

設計介面:

  • System - (Unity UI Text)
  • UserMessage - (Unity UI Text)
  • InputMessage - (Unity UI InputField)
  • Enter - (Unity UI Button)
  • Exit - (Unity UI Button)
  • Result - (Unity UI Text)
  • ResultMessage - (Unity UI Image)
    • Message - (Unity UI Text)
  • Horizontal - (Unity UI Scrollbar) 

以下3張圖片:

  • ResultMessage - (Unity UI Image) 組件設定
    • Message - (Unity UI Text) 組件設定
  • Horizontal - (Unity UI Scrollbar) 組件設定

image

image

image

沒有留言:

張貼留言