Optimized parsers for speed & memory usage

Switches to lazy parsing of diff chunks
This commit is contained in:
Eric Amodio
2017-06-10 00:16:46 -04:00
parent eeff31cf27
commit e6316400f0
16 changed files with 343 additions and 340 deletions

View File

@@ -1,16 +1,17 @@
'use strict';
import { GitAuthor, GitCommit, GitCommitLine } from './commit';
import { GitAuthor, GitCommitLine } from './commit';
import { GitBlameCommit } from './blameCommit';
export interface GitBlame {
repoPath: string;
authors: Map<string, GitAuthor>;
commits: Map<string, GitCommit>;
commits: Map<string, GitBlameCommit>;
lines: GitCommitLine[];
}
export interface GitBlameLine {
author: GitAuthor;
commit: GitCommit;
commit: GitBlameCommit;
line: GitCommitLine;
}
@@ -20,6 +21,6 @@ export interface GitBlameLines extends GitBlame {
export interface GitBlameCommitLines {
author: GitAuthor;
commit: GitCommit;
commit: GitBlameCommit;
lines: GitCommitLine[];
}

View File

@@ -0,0 +1,20 @@
'use strict';
import { GitCommit, GitCommitLine } from './commit';
export class GitBlameCommit extends GitCommit {
constructor(
repoPath: string,
sha: string,
fileName: string,
author: string,
date: Date,
message: string,
public lines: GitCommitLine[],
originalFileName?: string,
previousSha?: string,
previousFileName?: string
) {
super('blame', repoPath, sha, fileName, author, date, message, originalFileName, previousSha, previousFileName);
}
}

View File

@@ -21,7 +21,7 @@ export type GitCommitType = 'blame' | 'branch' | 'file' | 'stash';
export class GitCommit {
type: GitCommitType;
lines: GitCommitLine[];
// lines: GitCommitLine[];
originalFileName?: string;
previousSha?: string;
previousFileName?: string;
@@ -36,7 +36,7 @@ export class GitCommit {
public author: string,
public date: Date,
public message: string,
lines?: GitCommitLine[],
// lines?: GitCommitLine[],
originalFileName?: string,
previousSha?: string,
previousFileName?: string
@@ -44,7 +44,7 @@ export class GitCommit {
this.type = type;
this.fileName = this.fileName && this.fileName.replace(/, ?$/, '');
this.lines = lines || [];
// this.lines = lines || [];
this.originalFileName = originalFileName;
this.previousSha = previousSha;
this.previousFileName = previousFileName;

View File

@@ -1,20 +1,41 @@
'use strict';
import { GitDiffParser } from '../parsers/diffParser';
export interface GitDiffLine {
line: string;
state: 'added' | 'removed' | 'unchanged';
}
export interface GitDiffChunk {
current: (GitDiffLine | undefined)[];
currentStart: number;
currentEnd: number;
export class GitDiffChunk {
previous: (GitDiffLine | undefined)[];
previousStart: number;
previousEnd: number;
private _chunk: string | undefined;
private _current: (GitDiffLine | undefined)[] | undefined;
private _previous: (GitDiffLine | undefined)[] | undefined;
chunk?: string;
constructor(chunk: string, public currentPosition: { start: number, end: number }, public previousPosition: { start: number, end: number }) {
this._chunk = chunk;
}
get current(): (GitDiffLine | undefined)[] {
if (this._chunk !== undefined) {
this.parseChunk();
}
return this._current!;
}
get previous(): (GitDiffLine | undefined)[] {
if (this._chunk !== undefined) {
this.parseChunk();
}
return this._previous!;
}
private parseChunk() {
[this._current, this._previous] = GitDiffParser.parseChunk(this._chunk!);
this._chunk = undefined;
}
}
export interface GitDiff {

View File

@@ -1,6 +1,6 @@
'use strict';
import { Uri } from 'vscode';
import { GitCommit, GitCommitLine, GitCommitType } from './commit';
import { GitCommit, GitCommitType } from './commit';
import { GitStatusFileStatus, IGitStatusFile } from './status';
import * as path from 'path';
@@ -23,12 +23,11 @@ export class GitLogCommit extends GitCommit {
message: string,
status?: GitStatusFileStatus,
fileStatuses?: IGitStatusFile[],
lines?: GitCommitLine[],
originalFileName?: string,
previousSha?: string,
previousFileName?: string
) {
super(type, repoPath, sha, fileName, author, date, message, lines, originalFileName, previousSha, previousFileName);
super(type, repoPath, sha, fileName, author, date, message, originalFileName, previousSha, previousFileName);
this.fileNames = this.fileName;

View File

@@ -1,5 +1,6 @@
'use strict';
export * from './blame';
export * from './blameCommit';
export * from './branch';
export * from './commit';
export * from './diff';

View File

@@ -1,5 +1,4 @@
'use strict';
import { GitCommitLine } from './commit';
import { GitLogCommit } from './logCommit';
import { GitStatusFileStatus, IGitStatusFile } from './status';
@@ -14,12 +13,11 @@ export class GitStashCommit extends GitLogCommit {
message: string,
status?: GitStatusFileStatus,
fileStatuses?: IGitStatusFile[],
lines?: GitCommitLine[],
originalFileName?: string,
previousSha?: string,
previousFileName?: string
) {
super('stash', repoPath, sha, fileName, 'You', date, message, status, fileStatuses, lines, originalFileName, previousSha, previousFileName);
super('stash', repoPath, sha, fileName, 'You', date, message, status, fileStatuses, originalFileName, previousSha, previousFileName);
}
get shortSha() {

View File

@@ -1,5 +1,6 @@
'use strict';
import { Git, GitAuthor, GitBlame, GitCommit, GitCommitLine } from './../git';
import { Strings } from '../../system';
import { Git, GitAuthor, GitBlame, GitBlameCommit, GitCommitLine } from './../git';
import * as moment from 'moment';
import * as path from 'path';
@@ -11,15 +12,9 @@ interface BlameEntry {
lineCount: number;
author: string;
// authorEmail?: string;
authorDate?: string;
authorTimeZone?: string;
// committer?: string;
// committerEmail?: string;
// committerDate?: string;
// committerTimeZone?: string;
previousSha?: string;
previousFileName?: string;
@@ -30,18 +25,25 @@ interface BlameEntry {
export class GitBlameParser {
private static _parseEntries(data: string): BlameEntry[] | undefined {
static parse(data: string, repoPath: string | undefined, fileName: string): GitBlame | undefined {
if (!data) return undefined;
const lines = data.split('\n');
if (!lines.length) return undefined;
const authors: Map<string, GitAuthor> = new Map();
const commits: Map<string, GitBlameCommit> = new Map();
const lines: GitCommitLine[] = [];
const entries: BlameEntry[] = [];
let relativeFileName = repoPath && fileName;
let entry: BlameEntry | undefined = undefined;
let position = -1;
while (++position < lines.length) {
const lineParts = lines[position].split(' ');
let line: string;
let lineParts: string[];
let i = -1;
let first = true;
for (line of Strings.lines(data)) {
i++;
lineParts = line.split(' ');
if (lineParts.length < 2) continue;
if (entry === undefined) {
@@ -62,10 +64,6 @@ export class GitBlameParser {
: lineParts.slice(1).join(' ').trim();
break;
// case 'author-mail':
// entry.authorEmail = lineParts[1].trim();
// break;
case 'author-time':
entry.authorDate = lineParts[1];
break;
@@ -74,22 +72,6 @@ export class GitBlameParser {
entry.authorTimeZone = lineParts[1];
break;
// case 'committer':
// entry.committer = lineParts.slice(1).join(' ').trim();
// break;
// case 'committer-mail':
// entry.committerEmail = lineParts[1].trim();
// break;
// case 'committer-time':
// entry.committerDate = lineParts[1];
// break;
// case 'committer-tz':
// entry.committerTimeZone = lineParts[1];
// break;
case 'summary':
entry.summary = lineParts.slice(1).join(' ').trim();
break;
@@ -102,7 +84,15 @@ export class GitBlameParser {
case 'filename':
entry.fileName = lineParts.slice(1).join(' ');
entries.push(entry);
if (first && repoPath === undefined) {
// Try to get the repoPath from the most recent commit
repoPath = Git.normalizePath(fileName.replace(fileName.startsWith('/') ? `/${entry.fileName}` : entry.fileName!, ''));
relativeFileName = Git.normalizePath(path.relative(repoPath, fileName));
}
first = false;
GitBlameParser._parseEntry(entry, repoPath, relativeFileName, commits, authors, lines);
entry = undefined;
break;
@@ -111,71 +101,6 @@ export class GitBlameParser {
}
}
return entries;
}
static parse(data: string, repoPath: string | undefined, fileName: string): GitBlame | undefined {
const entries = this._parseEntries(data);
if (!entries) return undefined;
const authors: Map<string, GitAuthor> = new Map();
const commits: Map<string, GitCommit> = new Map();
const lines: GitCommitLine[] = [];
let relativeFileName = repoPath && fileName;
for (let i = 0, len = entries.length; i < len; i++) {
const entry = entries[i];
if (i === 0 && repoPath === undefined) {
// Try to get the repoPath from the most recent commit
repoPath = Git.normalizePath(fileName.replace(fileName.startsWith('/') ? `/${entry.fileName}` : entry.fileName!, ''));
relativeFileName = Git.normalizePath(path.relative(repoPath, fileName));
}
let commit = commits.get(entry.sha);
if (commit === undefined) {
if (entry.author !== undefined) {
let author = authors.get(entry.author);
if (author === undefined) {
author = {
name: entry.author,
lineCount: 0
};
authors.set(entry.author, author);
}
}
commit = new GitCommit('blame', repoPath!, entry.sha, relativeFileName!, entry.author, moment(`${entry.authorDate} ${entry.authorTimeZone}`, 'X +-HHmm').toDate(), entry.summary!);
if (relativeFileName !== entry.fileName) {
commit.originalFileName = entry.fileName;
}
if (entry.previousSha) {
commit.previousSha = entry.previousSha;
commit.previousFileName = entry.previousFileName;
}
commits.set(entry.sha, commit);
}
for (let j = 0, len = entry.lineCount; j < len; j++) {
const line: GitCommitLine = {
sha: entry.sha,
line: entry.line + j,
originalLine: entry.originalLine + j
};
if (commit.previousSha) {
line.previousSha = commit.previousSha;
}
commit.lines.push(line);
lines[line.line] = line;
}
}
commits.forEach(c => {
if (c.author === undefined) return;
@@ -185,23 +110,57 @@ export class GitBlameParser {
author.lineCount += c.lines.length;
});
const sortedAuthors: Map<string, GitAuthor> = new Map();
// const values =
Array.from(authors.values())
.sort((a, b) => b.lineCount - a.lineCount)
.forEach(a => sortedAuthors.set(a.name, a));
// const sortedCommits: Map<string, IGitCommit> = new Map();
// Array.from(commits.values())
// .sort((a, b) => b.date.getTime() - a.date.getTime())
// .forEach(c => sortedCommits.set(c.sha, c));
const sortedAuthors = new Map([...authors.entries()].sort((a, b) => b[1].lineCount - a[1].lineCount));
return {
repoPath: repoPath,
authors: sortedAuthors,
// commits: sortedCommits,
commits: commits,
lines: lines
} as GitBlame;
}
private static _parseEntry(entry: BlameEntry, repoPath: string | undefined, fileName: string | undefined, commits: Map<string, GitBlameCommit>, authors: Map<string, GitAuthor>, lines: GitCommitLine[]) {
let commit = commits.get(entry.sha);
if (commit === undefined) {
if (entry.author !== undefined) {
let author = authors.get(entry.author);
if (author === undefined) {
author = {
name: entry.author,
lineCount: 0
};
authors.set(entry.author, author);
}
}
commit = new GitBlameCommit(repoPath!, entry.sha, fileName!, entry.author, moment(`${entry.authorDate} ${entry.authorTimeZone}`, 'X +-HHmm').toDate(), entry.summary!, []);
if (fileName !== entry.fileName) {
commit.originalFileName = entry.fileName;
}
if (entry.previousSha) {
commit.previousSha = entry.previousSha;
commit.previousFileName = entry.previousFileName;
}
commits.set(entry.sha, commit);
}
for (let i = 0, len = entry.lineCount; i < len; i++) {
const line: GitCommitLine = {
sha: entry.sha,
line: entry.line + i,
originalLine: entry.originalLine + i
};
if (commit.previousSha) {
line.previousSha = commit.previousSha;
}
commit.lines.push(line);
lines[line.line] = line;
}
}
}

View File

@@ -1,4 +1,5 @@
'use strict';
import { Iterables, Strings } from '../../system';
import { GitDiff, GitDiffChunk, GitDiffLine } from './../git';
const unifiedDiffRegex = /^@@ -([\d]+),([\d]+) [+]([\d]+),([\d]+) @@([\s\S]*?)(?=^@@)/gm;
@@ -19,44 +20,7 @@ export class GitDiffParser {
const currentStart = +match[3];
const chunk = match[5];
const lines = chunk.split('\n').slice(1);
const current: (GitDiffLine | undefined)[] = [];
const previous: (GitDiffLine | undefined)[] = [];
for (const l of lines) {
switch (l[0]) {
case '+':
current.push({
line: ` ${l.substring(1)}`,
state: 'added'
});
previous.push(undefined);
break;
case '-':
current.push(undefined);
previous.push({
line: ` ${l.substring(1)}`,
state: 'removed'
});
break;
default:
current.push({ line: l, state: 'unchanged' });
previous.push({ line: l, state: 'unchanged' });
break;
}
}
chunks.push({
chunk: debug ? chunk : undefined,
current: current,
currentStart: currentStart,
currentEnd: currentStart + +match[4],
previous: previous,
previousStart: previousStart,
previousEnd: previousStart + +match[2]
});
chunks.push(new GitDiffChunk(chunk, { start: currentStart, end: currentStart + +match[4] }, { start: previousStart, end: previousStart + +match[2] }));
} while (match != null);
if (!chunks.length) return undefined;
@@ -67,4 +31,37 @@ export class GitDiffParser {
} as GitDiff;
return diff;
}
static parseChunk(chunk: string): [(GitDiffLine | undefined)[], (GitDiffLine | undefined)[]] {
const lines = Iterables.skip(Strings.lines(chunk), 1);
const current: (GitDiffLine | undefined)[] = [];
const previous: (GitDiffLine | undefined)[] = [];
for (const l of lines) {
switch (l[0]) {
case '+':
current.push({
line: ` ${l.substring(1)}`,
state: 'added'
});
previous.push(undefined);
break;
case '-':
current.push(undefined);
previous.push({
line: ` ${l.substring(1)}`,
state: 'removed'
});
break;
default:
current.push({ line: l, state: 'unchanged' });
previous.push({ line: l, state: 'unchanged' });
break;
}
}
return [current, previous];
}
}

View File

@@ -1,4 +1,5 @@
'use strict';
import { Strings } from '../../system';
import { Range } from 'vscode';
import { Git, GitAuthor, GitCommitType, GitLog, GitLogCommit, GitStatusFileStatus, IGitStatusFile } from './../git';
// import { Logger } from '../../logger';
@@ -11,9 +12,6 @@ interface LogEntry {
author: string;
authorDate?: string;
// committer?: string;
// committerDate?: string;
parentShas?: string[];
fileName?: string;
@@ -29,24 +27,47 @@ const diffRegex = /diff --git a\/(.*) b\/(.*)/;
export class GitLogParser {
private static _parseEntries(data: string, type: GitCommitType, maxCount: number | undefined, reverse: boolean): LogEntry[] | undefined {
static parse(data: string, type: GitCommitType, repoPath: string | undefined, fileName: string | undefined, sha: string | undefined, maxCount: number | undefined, reverse: boolean, range: Range | undefined): GitLog | undefined {
if (!data) return undefined;
const lines = data.split('\n');
if (!lines.length) return undefined;
const authors: Map<string, GitAuthor> = new Map();
const commits: Map<string, GitLogCommit> = new Map();
const entries: LogEntry[] = [];
let relativeFileName: string;
let recentCommit: GitLogCommit | undefined = undefined;
if (repoPath !== undefined) {
repoPath = Git.normalizePath(repoPath);
}
let entry: LogEntry | undefined = undefined;
let position = -1;
while (++position < lines.length) {
// Since log --reverse doesn't properly honor a max count -- enforce it here
if (reverse && maxCount && (entries.length >= maxCount)) break;
let line: string | undefined = undefined;
let lineParts: string[];
let next: IteratorResult<string> | undefined = undefined;
let lineParts = lines[position].split(' ');
if (lineParts.length < 2) {
continue;
let i = -1;
let first = true;
let skip = false;
const lines = Strings.lines(data);
// for (line of lines) {
while (true) {
if (!skip) {
next = lines.next();
if (next.done) break;
line = next.value;
i++;
}
else {
skip = false;
}
// Since log --reverse doesn't properly honor a max count -- enforce it here
if (reverse && maxCount && (i >= maxCount)) break;
lineParts = line!.split(' ');
if (lineParts.length < 2) continue;
if (entry === undefined) {
if (!Git.shaRegex.test(lineParts[0])) continue;
@@ -69,47 +90,54 @@ export class GitLogParser {
entry.authorDate = `${lineParts[1]}T${lineParts[2]}${lineParts[3]}`;
break;
// case 'committer':
// entry.committer = lineParts.slice(1).join(' ').trim();
// break;
// case 'committer-date':
// entry.committerDate = lineParts.slice(1).join(' ').trim();
// break;
case 'parents':
entry.parentShas = lineParts.slice(1);
break;
case 'summary':
entry.summary = lineParts.slice(1).join(' ').trim();
while (++position < lines.length) {
const next = lines[position];
if (!next) break;
if (next === 'filename ?') {
position--;
while (true) {
next = lines.next();
if (next.done) break;
i++;
line = next.value;
if (!line) break;
if (line === 'filename ?') {
skip = true;
break;
}
entry.summary += `\n${lines[position]}`;
entry.summary += `\n${line}`;
}
break;
case 'filename':
if (type === 'branch') {
const nextLine = lines[position + 1];
// If the next line isn't blank, make sure it isn't starting a new commit
if (nextLine && Git.shaRegex.test(nextLine)) continue;
next = lines.next();
if (next.done) break;
position++;
i++;
line = next.value;
// If the next line isn't blank, make sure it isn't starting a new commit
if (line && Git.shaRegex.test(line)) {
skip = true;
continue;
}
let diff = false;
while (++position < lines.length) {
const line = lines[position];
while (true) {
next = lines.next();
if (next.done) break;
i++;
line = next.value;
lineParts = line.split(' ');
if (Git.shaRegex.test(lineParts[0])) {
position--;
skip = true;
break;
}
@@ -147,127 +175,90 @@ export class GitLogParser {
}
}
else {
position += 2;
const line = lines[position];
next = lines.next();
next = lines.next();
i += 2;
line = next.value;
entry.status = line[0] as GitStatusFileStatus;
entry.fileName = line.substring(1);
this._parseFileName(entry);
}
entries.push(entry);
if (first && repoPath === undefined && type === 'file' && fileName !== undefined) {
// Try to get the repoPath from the most recent commit
repoPath = Git.normalizePath(fileName.replace(fileName.startsWith('/') ? `/${entry.fileName}` : entry.fileName!, ''));
relativeFileName = Git.normalizePath(path.relative(repoPath, fileName));
}
else {
relativeFileName = entry.fileName!;
}
first = false;
recentCommit = GitLogParser._parseEntry(entry, type, repoPath, relativeFileName, commits, authors, recentCommit);
entry = undefined;
break;
default:
break;
}
if (next!.done) break;
}
return entries;
}
static parse(data: string, type: GitCommitType, repoPath: string | undefined, fileName: string | undefined, sha: string | undefined, maxCount: number | undefined, reverse: boolean, range: Range | undefined): GitLog | undefined {
const entries = this._parseEntries(data, type, maxCount, reverse);
if (!entries) return undefined;
const authors: Map<string, GitAuthor> = new Map();
const commits: Map<string, GitLogCommit> = new Map();
let relativeFileName: string;
let recentCommit: GitLogCommit | undefined = undefined;
if (repoPath !== undefined) {
repoPath = Git.normalizePath(repoPath);
}
for (let i = 0, len = entries.length; i < len; i++) {
// Since log --reverse doesn't properly honor a max count -- enforce it here
if (reverse && maxCount && (i >= maxCount)) break;
const entry = entries[i];
if (i === 0 && repoPath === undefined && type === 'file' && fileName !== undefined) {
// Try to get the repoPath from the most recent commit
repoPath = Git.normalizePath(fileName.replace(fileName.startsWith('/') ? `/${entry.fileName}` : entry.fileName!, ''));
relativeFileName = Git.normalizePath(path.relative(repoPath, fileName));
}
else {
relativeFileName = entry.fileName!;
}
let commit = commits.get(entry.sha);
if (commit === undefined) {
if (entry.author !== undefined) {
let author = authors.get(entry.author);
if (author === undefined) {
author = {
name: entry.author,
lineCount: 0
};
authors.set(entry.author, author);
}
}
commit = new GitLogCommit(type, repoPath!, entry.sha, relativeFileName, entry.author, moment(entry.authorDate).toDate(), entry.summary!, entry.status, entry.fileStatuses, undefined, entry.originalFileName);
commit.parentShas = entry.parentShas!;
if (relativeFileName !== entry.fileName) {
commit.originalFileName = entry.fileName;
}
commits.set(entry.sha, commit);
}
// else {
// Logger.log(`merge commit? ${entry.sha}`);
// }
if (recentCommit !== undefined) {
recentCommit.previousSha = commit.sha;
// If the commit sha's match (merge commit), just forward it along
commit.nextSha = commit.sha !== recentCommit.sha ? recentCommit.sha : recentCommit.nextSha;
// Only add a filename if this is a file log
if (type === 'file') {
recentCommit.previousFileName = commit.originalFileName || commit.fileName;
commit.nextFileName = recentCommit.originalFileName || recentCommit.fileName;
}
}
recentCommit = commit;
}
commits.forEach(c => {
if (c.author === undefined) return;
const author = authors.get(c.author);
if (author === undefined) return;
author.lineCount += c.lines.length;
});
const sortedAuthors: Map<string, GitAuthor> = new Map();
// const values =
Array.from(authors.values())
.sort((a, b) => b.lineCount - a.lineCount)
.forEach(a => sortedAuthors.set(a.name, a));
// const sortedCommits: Map<string, IGitCommit> = new Map();
// Array.from(commits.values())
// .sort((a, b) => b.date.getTime() - a.date.getTime())
// .forEach(c => sortedCommits.set(c.sha, c));
return {
repoPath: repoPath,
authors: sortedAuthors,
// commits: sortedCommits,
authors: authors,
commits: commits,
sha: sha,
maxCount: maxCount,
range: range,
truncated: !!(maxCount && entries.length >= maxCount)
truncated: !!(maxCount && i >= maxCount)
} as GitLog;
}
private static _parseEntry(entry: LogEntry, type: GitCommitType, repoPath: string | undefined, relativeFileName: string, commits: Map<string, GitLogCommit>, authors: Map<string, GitAuthor>, recentCommit: GitLogCommit | undefined): GitLogCommit | undefined {
let commit = commits.get(entry.sha);
if (commit === undefined) {
if (entry.author !== undefined) {
let author = authors.get(entry.author);
if (author === undefined) {
author = {
name: entry.author,
lineCount: 0
};
authors.set(entry.author, author);
}
}
commit = new GitLogCommit(type, repoPath!, entry.sha, relativeFileName, entry.author, moment(entry.authorDate).toDate(), entry.summary!, entry.status, entry.fileStatuses, undefined, entry.originalFileName);
commit.parentShas = entry.parentShas!;
if (relativeFileName !== entry.fileName) {
commit.originalFileName = entry.fileName;
}
commits.set(entry.sha, commit);
}
// else {
// Logger.log(`merge commit? ${entry.sha}`);
// }
if (recentCommit !== undefined) {
recentCommit.previousSha = commit.sha;
// If the commit sha's match (merge commit), just forward it along
commit.nextSha = commit.sha !== recentCommit.sha ? recentCommit.sha : recentCommit.nextSha;
// Only add a filename if this is a file log
if (type === 'file') {
recentCommit.previousFileName = commit.originalFileName || commit.fileName;
commit.nextFileName = recentCommit.originalFileName || recentCommit.fileName;
}
}
return commit;
}
private static _parseFileName(entry: { fileName?: string, originalFileName?: string }) {
if (entry.fileName === undefined) return;