D言語を使ってdiffを書いてみる

ずっと前から存在は知ってたけど、使うのは初めて。
とりあえずdiffプログラムを書いてみた:

private import std.stream;
private import std.string;
private import std.file;

private class Buffer {
  private char[] name;
  private char[][] lines;
  private int current;
  
  public this(char[] name, char[][] lines) {
    this.name = name;
    this.lines = lines;
    this.current = 0;
  }
  
  public char[] getName() {
    return this.name;
  }
  
  public char[] getCurrentLine() {
    return this.getLineAt(this.getCurrent());
  }
  
  public char[] getLineAt(int index) {
    return this.lines[index];
  }
  
  public char[][] getLines() {
    return this.lines;
  }
  
  public int getLength() {
    return this.lines.length;
  }
  
  public bool isEnd() {
    return this.getCurrent() >= this.getLength();
  }
  
  public int getCurrent() {
    return this.current;
  }
  public void setCurrent(int current) {
    return this.current = current;
  }
  
  public void next() {
    if (this.current < this.getLength()) {
      this.current += 1;
    }
  }
  
  public static Buffer readFile(char[] fileName) {
    File file = new File(fileName);
    char[][] lines;
    lines.length = 128;
    int size = 0;
    while (! file.eof()) {
      char[] line = file.readLine();
      if (lines.length == size) {
        lines.length = lines.length * 2;
      }
      lines[size] = line;
      size += 1;
    }
    lines.length = size;
    file.close();
    //stderr.writef("file: %s size: %s \n", fileName, size);
    return new Buffer(fileName, lines);
  }
}

private class SameLineSearcher {
  private Buffer buffer;
  private int searchingCurrent;
  private SameLineSearcher other;
  private LineComparator comparator;
  
  public this(Buffer buffer, LineComparator comparator) {
    this.buffer = buffer;
    this.comparator = comparator;
  }
  
  public void setOther(SameLineSearcher other) {
    this.other = other;
  }
  
  public void find() {
    this.init();
    other.init();
    if (! this.searchCurrent()) {
      this.next();
      this.other.next();
    }
  }
  
  private void next() {
    this.buffer.next();
  }
  
  private void init() {
    this.searchingCurrent = buffer.getCurrent();
  }
  
  private bool isLast() {
    return this.searchingCurrent >= this.buffer.getLength();
  }
  
  private char[] getSearchingLine() {
    return this.buffer.getLineAt(this.searchingCurrent);
  }
  private char[] getCurrentLine() {
    return this.buffer.getCurrentLine();
  }
  
  private bool isSameLine(char[] lineA, char[] lineB) {
    return this.comparator.isSame(lineA, lineB);
  }
  
  private bool searchCurrent() {
    this.searchingCurrent += 1;
    if (! this.isLast()) {
      if (this.isSameLine(this.getSearchingLine(), this.other.getCurrentLine())) {
        this.buffer.setCurrent(this.searchingCurrent);
        return true;
      } else {
        return this.other.searchCurrent();
      }
    } else {
      if (! this.other.isLast()) {
        return this.other.searchCurrent();
      } else {
        return false;
      }
    }
  }
}

public interface DiffFormatter {
  public void begin(char[] nameA, char[][] linesA,
                    char[] nameB, char[][] linesB);
  public void foundSameLines(int bufABeginRegion, int bufAEndRegion,
                             int bufBBeginRegion, int bufBEndRegion);
  public void end();
}

public interface LineComparator {
  public bool isSame(char[] lineA, char[] lineB);
}

public class Diff {
  private Buffer bufA;
  private Buffer bufB;
  private DiffFormatter processor;
  private LineComparator comparator;
  
  public this(char[] fileNameA, char[] fileNameB, LineComparator comparator, DiffFormatter processor) {
    this.bufA = Buffer.readFile(fileNameA);
    this.bufB = Buffer.readFile(fileNameB);
    this.comparator = comparator;
    this.processor = processor;
  }
  
  public void run() {
    SameLineSearcher searchA = new SameLineSearcher(this.bufA, this.comparator);
    SameLineSearcher searchB = new SameLineSearcher(this.bufB, this.comparator);
    searchA.setOther(searchB);
    searchB.setOther(searchA);
    
    bool inSameRegion = false;
    int bufABeginRegion = -1;
    int bufAEndRegion = -1;
    int bufBBeginRegion = -1;
    int bufBEndRegion = -1;
    
    this.processor.begin(this.bufA.getName(), this.bufA.getLines(),
                         this.bufB.getName(), this.bufB.getLines());
    
    while (! bufA.isEnd() && ! bufB.isEnd()) {
      if (this.isSameLine(this.bufA.getCurrentLine(), this.bufB.getCurrentLine())) {
        if (!inSameRegion) {
          bufABeginRegion = this.bufA.getCurrent();
          bufBBeginRegion = this.bufB.getCurrent();
          inSameRegion = true;
        }
        bufAEndRegion = this.bufA.getCurrent();
        bufBEndRegion = this.bufB.getCurrent();
        this.bufA.next();
        this.bufB.next();
      } else {
        if (inSameRegion) {
          inSameRegion = false;
          this.processor.foundSameLines(bufABeginRegion, bufAEndRegion,
                                        bufBBeginRegion, bufBEndRegion);
        }
        searchA.find();
      }
    }
    if (inSameRegion) {
      this.processor.foundSameLines(bufABeginRegion, bufAEndRegion,
                                    bufBBeginRegion, bufBEndRegion);
    }
    this.processor.end();
  }
  
  private bool isSameLine(char[] lineA, char[] lineB) {
    return this.comparator.isSame(lineA, lineB);
  }
}

public class CmpLineComparator: LineComparator {
  public bool isSame(char[] lineA, char[] lineB) {
    return .cmp(lineA, lineB) == 0;
  }
}

public class SimpleOutput: DiffFormatter {
  private char[][] linesA;
  private char[][] linesB;
  private int bufACursor;
  private int bufBCursor;
  
  public void begin(char[] nameA, char[][] linesA,
                    char[] nameB, char[][] linesB) {
    this.linesA = linesA;
    this.linesB = linesB;
    this.bufACursor = 0;
    this.bufBCursor = 0;
  }
  
  public void foundSameLines(int bufABeginRegion, int bufAEndRegion,
                             int bufBBeginRegion, int bufBEndRegion) {
    //stdout.writef("%d,%d:%d,%d\n", bufABeginRegion, bufAEndRegion, bufBBeginRegion, bufBEndRegion);
    this.printDiff(this.bufACursor, bufABeginRegion, this.bufBCursor, bufBBeginRegion);
    this.bufACursor = bufAEndRegion + 1;
    this.bufBCursor = bufBEndRegion + 1;
  }
  
  public void end() {
    //stdout.writef("%d,%d:%d,%d\n", this.bufACursor, this.linesA.length , this.bufBCursor, this.linesB.length);
    this.printDiff(this.bufACursor, this.linesA.length, this.bufBCursor, this.linesB.length);
  }
  
  private void printDiff(int beginA, int endA, int beginB, int endB) {
    if (beginA >= endA && beginB >= endB) {
      return;
    }
    if (beginA == endA) {
      stdout.write(.toString(beginA));
      stdout.write(.toString("a"));
      stdout.write(.toString(beginB + 1));
      if (beginB + 1 < endB) {
        stdout.write(.toString(","));
        stdout.write(.toString(endB));
      }
      stdout.writeLine("");
    } else if (beginB == endB) {
      stdout.writef("%d", beginA + 1);
      if (beginA + 1 < endA) {
        stdout.writef(",%d", endA);
      }
      stdout.writef("d");
      stdout.writef("%d", beginB);
      stdout.writeLine("");
    } else {
      stdout.writef("%d", beginA + 1);
      if (beginA + 1 < endA) {
        stdout.writef(",%d", endA);
      }
      stdout.writef("c");
      stdout.writef("%d", beginB + 1);
      if (beginB + 1 < endB) {
        stdout.writef(",%d", endB);
      }
      stdout.writeLine("");
    }
    for (int i = beginA; i < endA; i++) {
      stdout.writeLine("< " ~ this.linesA[i]);
    }
    if (beginA < endA && beginB < endB) {
      stdout.writeLine("---");
    }
    for (int i = beginB; i < endB; i++) {
      stdout.writeLine("> " ~ this.linesB[i]);
    }
  }
}

int main(char[][] args) {
  if (args.length != 3) {
    stderr.writef("usage: %s file1 file2", args[0]);
    stderr.writeLine("");
    return 1;
  }
  
  bool checked = true;
  if (! .exists(args[1])) {
    stderr.writef("%s: %s: No such file or directory", args[0], args[1]);
    stderr.writeLine("");
    checked = false;
  }
  if (! .exists(args[2])) {
    stderr.writef("%s: %s: No such file or directory", args[0], args[2]);
    stderr.writeLine("");
    checked = false;
  }
  if (! checked) {
    return 1;
  }
  
  LineComparator comparator = new CmpLineComparator();
  DiffFormatter formatter = new SimpleOutput();
  Diff diff = new Diff(args[1], args[2], comparator, formatter);
  diff.run();
  
  return 0;
}

感想としては

  • ネイティブプログラムを書くにはいいかも

あと、in,out,invariantとかunittestとかも面白そう。

以下は、あれれとおもったとこ

  • Phobos(標準ライブラリ)では文字列処理がいまいち変
    • charbuf.length == 0だとnull扱いらしい(readLine読み込みではまった)
    • でもwcharbuf.length == 0はnullではない
    • シンプルな標準型変換機能がほしい(.toStringとかめんどい)
    • wcharが結構扱いにくい(.cmp(char,char)しかないし)
  • .isfile(filename)でファイルがないとErrorって...
    • なぜboolじゃないんだろ
  • 動的配列の扱い
    • buf.length = newsize というのも...
    • 自動リサイズと切り詰めメソッドがあれば...
  • キャッチされないErrorのcallstackトレースがほしい
  • return文なしのようなフローに関するチェックはしないっぽい

文字列についてはクラスライブラリで対応できそうなところではある。しかし、この辺はどのライブラリでも影響してくる部分なので、全てラップでもしない限り生のchar[]や配列はつかわざろう得なくなるところでもある。

個人的には、ソースコードをテキストで書く以上、新しい言語では文字列はもっと特別視するべきと思っているので、文字列まわりが弱いのは少し残念なところだったり。