一道Google的面试题 - Simplified Regular Expression Parser_JAVA_编程开发_程序员俱乐部

中国优秀的程序员网站程序员频道CXYCLUB技术地图
热搜:
更多>>
 
您所在的位置: 程序员俱乐部 > 编程开发 > JAVA > 一道Google的面试题 - Simplified Regular Expression Parser

一道Google的面试题 - Simplified Regular Expression Parser

 2012/8/21 11:13:38  daniel.wuz  程序员俱乐部  我要评论(0)
  • 摘要:一、题目如下:--------------------------WriteaparserforasimplifiedregularexpressionOnanalphabetset[a-z],asimplifiedregularexpressionismuchsimplerthanthenormalregularexpression.Ithasonlytwometacharacters:'.'and'*'.'.'--exactonearbitrarycharactermatch.'*'-
  • 标签:面试 expression Google 面试题
一、题目如下:
--------------------------
Write a parser for a simplified regular expression
On an alphabet set [a-z], a simplified regular expression is much simpler than the normal regular expression.

It has only two meta characters: '.' and '*'.

'.' -- exact one arbitrary character match.

'*' -- zero or more arbitrary character match.
--------------------------

具个例子表达式 ab.*d 能匹配 'abcd', 'abccd', 'abad'。 不能匹配'abc', ' '(空字符串), 'abd'。

二、解法:

解析给定的表达式
生成对应的 NFA(Nondeterministic Finite Automation)
NFA转化为DFA(Deterministic Finite Automation)
利用DFA判断输入字符串

不懂正则表达式? 参考http://deerchao.net/tutorials/regex/regex.htm
不懂NFA? 参考http://planning.cs.uiuc.edu/node558.html
不懂DFA?参考http://lambda.uta.edu/cse5317/notes/node8.html

三、相关代码:
FiniteAutomata.java 和State.java,随手写写,请选择性参考。

FiniteAutomata.java
package parser;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Queue;
import java.util.Set;

public class FiniteAutomata {

	private State start;

	private final static char[] inputs;

	static {
		inputs = new char[26];
		for (char i = 0; i < 26; i++) {
			inputs[i] = (char) ('a' + i);
		}
	}

	private final char episilon = 0;

	private char stateId;

	public void compile(String pattern) {
		this.stateId = 'A';
		State NFA = toNFA(pattern);
		this.start = convertToDFA(NFA);
	}

	private State convertToDFA(State NFA) {
		Map<Set<State>, State> cache = new HashMap<Set<State>, State>();
		Queue<Set<State>> queue = new LinkedList<Set<State>>();

		// start state of DFA
		Set<State> start = episilon(NFA);
		State starter = nextState();
		cache.put(start, starter);
		queue.offer(start);
		while (!queue.isEmpty()) {
			Set<State> currentEpisilon = queue.poll();
			State current = cache.get(currentEpisilon);
			// create new State
			for (char input : inputs) {
				Set<State> nextEpisilon = move(currentEpisilon, input);
				if (nextEpisilon.isEmpty()) {
					continue;
				}
				State next;
				if (!cache.containsKey(nextEpisilon)) {
					next = nextState();
					cache.put(nextEpisilon, next);
					queue.offer(nextEpisilon);
				} else {
					next = cache.get(nextEpisilon);
				}
				boolean isAccept = containsAcceptState(nextEpisilon);
				next.setAccept(isAccept);
				current.setTransition(input, next);
			}
		}
		return starter;
	}

	private Set<State> move(Set<State> currentEpisilon, char input) {
		Set<State> result = new HashSet<State>();
		for (State s : currentEpisilon) {
			State next = s.transit(input);
			if (next != null) {
				result.add(next);
			}
		}
		return episilon(result);
	}

	private boolean containsAcceptState(Set<State> nextEpisilon) {
		for (State state : nextEpisilon) {
			if (state.isAccept()) {
				return true;
			}
		}
		return false;
	}

	private Set<State> episilon(State s) {
		Set<State> cache = new HashSet<State>();
		cache.add(s);
		return episilon(s, cache);
	}

	private Set<State> episilon(Set<State> states) {
		Set<State> cache = new HashSet<State>();
		for (State s : states) {
			cache.add(s);
			cache.addAll(episilon(s, cache));
		}
		return cache;
	}

	private Set<State> episilon(State s, Set<State> cache) {
		State next = s.transit(episilon);
		if (next != null && !cache.contains(next)) {
			cache.add(next);
			cache.addAll(episilon(s, cache));
		}
		return cache;
	}

	private State toNFA(String pattern) {
		char[] expr = pattern.toCharArray();
		State currentState = nextState();
		State starter = currentState;
		for (char c : expr) {
			if (c == '.') {
				State nextState = nextState();
				// add each transition for all inputs
				for (char i : inputs) {
					currentState.setTransition(i, nextState);
				}
				currentState = nextState;
			} else if (c == '*') {
				State nextState = nextState();
				// add each transition for all inputs
				for (char i : inputs) {
					currentState.setTransition(i, nextState);
				}
				currentState.setTransition(episilon, nextState);
				nextState.setTransition(episilon, currentState);
				currentState = nextState;
			} else if (c >= 'a' && c <= 'z') {
				State nextState = nextState();
				currentState.setTransition(c, nextState);
				currentState = nextState;
			} else {
				throw new RuntimeException("Unrecognized expression");
			}
		}
		currentState.setAccept(true);
		return starter;
	}

	private State nextState() {
		return new State(stateId++);
	}

	public void constructDFA(Map<State, Map<Character, State>> mapping) {
		Iterator<Map.Entry<State, Map<Character, State>>> it = mapping
				.entrySet().iterator();
		while (it.hasNext()) {
			Entry<State, Map<Character, State>> entry = it.next();
			State state = entry.getKey();
			Map<Character, State> transition = entry.getValue();
			state.setTransition(transition);
		}
	}

	public boolean match(String c) {
		char[] candidate = c.toCharArray();
		for (char d : candidate) {
			start = start.transit(d);
			if (start == null) {
				return false;
			}
		}
		return start.isAccept();
	}

	public static String[] patterns = { "abc", "*", "*abc", "*abc", "a*bc",
			"a*bc", "a*", "a*", "a*", "a*", "*abc*", "*****", "...", ".*",
			".bc*", ".b*c*a", "*", "abc", "*a", "a", ".a*c", "a.*b", "..", "",
			"" };

	public static String[] candidates = { "abc", "abc", "abc", "aaabbbabc",
			"aaabbbabc", "abc", "abc", "a", "aa", "abcdef", "abc", "abc",
			"abc", "abc", "abc", "abca", "", "abcd", "abcd", "", "abc", "abc",
			"abc", "", "abc" };

	public static void main(String[] args) {
		FiniteAutomata fa = new FiniteAutomata();
		for (int i = 0; i < patterns.length; i++) {
			fa.compile(patterns[i]);
			System.out.println(fa.match(candidates[i]));
		}
	}

}


State.java
package parser;

import java.util.HashMap;
import java.util.Map;

public class State {

	private boolean accept = false;

	private char id;

	private Map<Character, State> mapping = new HashMap<Character, State>();

	public State(char c) {
		this.id = c;
	}

	public State(char c, boolean isAccept) {
		this.id = c;
		this.accept = isAccept;
	}

	public State transit(char input) {
		return mapping.get(input);
	}

	public void setTransition(char input, State next) {
		this.mapping.put(input, next);
	}

	public void setTransition(Map<Character, State> mapping) {
		this.mapping = mapping;
	}

	public boolean isAccept() {
		return this.accept;
	}

	public void setAccept(boolean b) {
		this.accept = b;
	}

	@Override
	public int hashCode() {
		final int prime = 31;
		int result = 1;
		result = prime * result + id;
		return result;
	}

	@Override
	public boolean equals(Object obj) {
		if (this == obj)
			return true;
		if (obj == null)
			return false;
		if (getClass() != obj.getClass())
			return false;
		State other = (State) obj;
		if (id != other.id)
			return false;
		return true;
	}

	@Override
	public String toString() {
		return "State [id=" + id + "]";
	}
	
}
发表评论
用户名: 匿名