Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lnk metadata extraction #2308

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import iped.parsers.discord.DiscordParser;
import iped.parsers.emule.KnownMetParser;
import iped.parsers.emule.PartMetParser;
import iped.parsers.lnk.LNKShortcutParser;
import iped.parsers.mail.RFC822Parser;
import iped.parsers.mail.win10.Win10MailParser;
import iped.parsers.python.PythonParser;
Expand Down Expand Up @@ -62,6 +63,7 @@ private static Map<MediaType, Integer> installTypesToPostProcess() {

// handle wal logs
mediaTypes.put(SQLite3Parser.MEDIA_TYPE, 2);
mediaTypes.put(MediaType.parse(LNKShortcutParser.LNK_MIME_TYPE), 2);

// must be after sqlite processing to find storage_db.db
mediaTypes.put(SkypeParser.SKYPE_MIME, 3);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,7 @@ public static void parseExtensionBlock(byte[] b, int posSig, LNKShellItem objIte
long indMft = toInt6(b, posTmp);
int seqNum = toSmall(b, posTmp + 6);
if (indMft > 0) {
fEntry.setIndMft(indMft);
fEntry.setNtfsRef(
"MFT Entry Idx " + String.valueOf(indMft) + " - Seq.Numb. " + String.valueOf(seqNum)); //$NON-NLS-1$ //$NON-NLS-2$
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ public class LNKShellItemFileEntry {
private long fileSize, modifiedDate, createDate, accessDate;
private int fileAttributeFlags, tipoShell;
private String primaryName, secondaryName, guidShellFolder, ntfsRef, unknown;
private long indMft;
private StringBuffer extensionSigs, localizedNames;

public int getTipoShell() {
Expand Down Expand Up @@ -175,4 +176,12 @@ public static String toDateStrFromFAT(DateFormat df, long ft) {

return df.format(calendar.getTime());
}

public long getIndMft() {
return indMft;
}

public void setIndMft(long indMft) {
this.indMft = indMft;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.Set;
import java.util.TimeZone;
import java.util.concurrent.atomic.AtomicBoolean;

import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.HttpHeaders;
Expand All @@ -38,7 +40,12 @@
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

import iped.data.IItemReader;
import iped.parsers.util.Messages;
import iped.parsers.util.MetadataUtil;
import iped.properties.BasicProps;
import iped.properties.ExtraProperties;
import iped.search.IItemSearcher;

/**
* Parser para arquivos de atalho (LNK) do Windows Referencias utilizadas sobre
Expand All @@ -56,18 +63,40 @@ public class LNKShortcutParser extends AbstractParser {
private static final Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.application("x-lnk")); //$NON-NLS-1$
public static final String LNK_MIME_TYPE = "application/x-lnk"; //$NON-NLS-1$

public static final String LNK_METADATA_PREFIX = "lnk";
public static final String LNK_METADATA_LOCALPATHINFO = "localPathInfo";
public static final String LNK_METADATA_LOCALPATH = "localPath";
public static final String LNK_METADATA_COMMONPATH = "commonPath";
public static final String LNK_METADATA_NETWORKSHARE = "networkShare";
public static final String LNK_METADATA_VOLUMELABEL = "volumeLabel";
public static final String LNK_METADATA_FILEEXISTS = "fileExists";
public static final String LNK_METADATA_FILEMODIFIED = "modifiedAfterOpen";

static AtomicBoolean initialized = new AtomicBoolean(false);

@Override
public Set<MediaType> getSupportedTypes(ParseContext arg0) {
return SUPPORTED_TYPES;

}

@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
synchronized (initialized) {
if (!initialized.get()) {
MetadataUtil.setMetadataType(LNK_METADATA_PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + BasicProps.CREATED, Date.class);
MetadataUtil.setMetadataType(LNK_METADATA_PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + BasicProps.ACCESSED, Date.class);
MetadataUtil.setMetadataType(LNK_METADATA_PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + BasicProps.MODIFIED, Date.class);
initialized.set(true);
}
}

final DateFormat df = new SimpleDateFormat(Messages.getString("LNKShortcutParser.DateFormat")); //$NON-NLS-1$
df.setTimeZone(TimeZone.getTimeZone("GMT+0")); //$NON-NLS-1$

final DateFormat dfMetadata = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");

metadata.set(HttpHeaders.CONTENT_TYPE, LNK_MIME_TYPE);
metadata.remove(TikaCoreProperties.RESOURCE_NAME_KEY);

Expand Down Expand Up @@ -95,9 +124,52 @@ public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
showHeader(lnkObj, df, xhtml);

// HasLinkInfo
if (lnkObj.hasLinkLocation())
if (lnkObj.hasLinkLocation()) {
showLinkLocation(lnkObj, df, xhtml);

// According to
// https://github.com/libyal/liblnk/blob/main/documentation/Windows%20Shortcut%20File%20(LNK)%20format.asciidoc#4-location-information
// the real local path is the concatenation of netshare, commonPath and
// localPath
String fullLocalPath = "";
LNKLinkLocation lnkLoc = lnkObj.getLinkLocation();

metadata.add(LNK_METADATA_PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + LNK_METADATA_LOCALPATHINFO, lnkLoc.getLocalPath());
metadata.add(LNK_METADATA_PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + LNK_METADATA_COMMONPATH, lnkLoc.getCommonPath());
metadata.add(LNK_METADATA_PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + LNK_METADATA_NETWORKSHARE, lnkLoc.getNetShare());
metadata.add(LNK_METADATA_PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + LNK_METADATA_VOLUMELABEL, lnkLoc.getVolumeLabel());

if (lnkLoc.getNetShare() != null) {
fullLocalPath = fullLocalPath + lnkLoc.getNetShare();
if (!fullLocalPath.endsWith("\\")) {
fullLocalPath += "\\";
}
}
if (lnkLoc.getCommonPath() != null) {
fullLocalPath = fullLocalPath + lnkLoc.getCommonPath();
if (!fullLocalPath.equals("") && !fullLocalPath.endsWith("\\")) {
fullLocalPath += "\\";
}
}
fullLocalPath = fullLocalPath + lnkLoc.getLocalPath();
metadata.add(LNK_METADATA_PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + LNK_METADATA_LOCALPATH, fullLocalPath);
metadata.add(LNK_METADATA_PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + BasicProps.LENGTH, Long.toString(lnkObj.getFileSize()));

try {
IItemReader refItem = makeReference(metadata, context, lnkObj, fullLocalPath, df, dfMetadata);
if (refItem == null) {
// if, and only if, the source wasn't found, register the timestamps
metadata.add(LNK_METADATA_PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + BasicProps.CREATED, lnkObj.getCreateDate(dfMetadata));
metadata.add(LNK_METADATA_PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + BasicProps.MODIFIED, lnkObj.getModifiedDate(dfMetadata));
metadata.add(LNK_METADATA_PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + BasicProps.ACCESSED, lnkObj.getAccessDate(dfMetadata));
}
} catch (Exception e) {
// unpredictable error when making reference
e.printStackTrace();
}

}

// HasName HasRelativePath HasWorkingDir HasArguments HasIconLocation
if (lnkObj.hasName() || lnkObj.hasRelativePath() || lnkObj.hasWorkingDir() || lnkObj.hasArguments()
|| lnkObj.hasIconLocation())
Expand All @@ -118,6 +190,119 @@ public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
xhtml.endDocument();
}

//
// Makes the reference to a found target if:
// 1) the metaAddress (mft idx in NTFS) and creation date is the same.
// - Only metaAddress can reference different file in other filesystem, as the
// number can in different FS may coincide. So creationDate is used to confirm.
// 2) the path is similar and creation date is the same
// - CreationDate is considerably reliable value, assuming no dates
// manipulation, as it is not probable 2 files created at same time. The path is
// used to confirm.
//
// If creation date is different, it still can mean that the file was moved to
// a different partition or drive (different file system).
// So here comes the question: should it still be considered the same file if
// only size and name is the same? Meanwhile, in this first version, it isn't.
private IItemReader makeReference(Metadata metadata, ParseContext context, LNKShortcut lnkObj, String fullLocalPath, DateFormat df, DateFormat dfMetadata) {
if (fullLocalPath.startsWith("file://")) {
fullLocalPath.substring(7);
}
IItemReader result = null;
LNKLinkLocation lnkLoc = lnkObj.getLinkLocation();
if (lnkLoc.getNetShare() == null) {
// tries to link to local file only if net info not defined
IItemSearcher searcher = context.get(IItemSearcher.class);
if (searcher != null) {

List<IItemReader> items = null;
boolean mftIdxFound = false;

if (lnkObj.hasTargetIDList() && lnkObj.getShellTargetIDList().size() > 0) {
// search based on MFT entry index, if existent
LNKShellItem lastTarget = lnkObj.getShellTargetIDList().get(lnkObj.getShellTargetIDList().size() - 1);
if (lastTarget.hasFileEntry()) {
LNKShellItemFileEntry fEntry = lastTarget.getFileEntry();
items = searcher.search(BasicProps.META_ADDRESS + ":" + fEntry.getIndMft());
if (items.size() <= 0) {
items = null;
} else {
mftIdxFound = true;
}
}
}

if (items != null) {
result = makeReference(metadata, context, lnkObj, items, df, dfMetadata);
}

if (result == null) {// if no reference could be done based on metaAddress
// searches based on path
String relLocalPath = fullLocalPath.replace("\\", "\\\\");
int i = relLocalPath.indexOf(":");// search for drive letter separator
if (i > 0) {
relLocalPath = relLocalPath.substring(i + 1);// gets path starting from drive path separator
}
items = searcher.search(BasicProps.PATH + ":\"" + relLocalPath + "\"");

if (items != null && items.size() > 0) {
result = makeReference(metadata, context, lnkObj, items, df, dfMetadata);
}
}
}
}
return result;
}

private IItemReader makeReference(Metadata metadata, ParseContext context, LNKShortcut lnkObj, List<IItemReader> items, DateFormat df, DateFormat dfMetadata) {
for (IItemReader iReader : items) {
// creation date will confirm that the item is from the correct volume/path
Date created = iReader.getCreationDate();
if (created != null) {
if (df.format(created).equals(lnkObj.getCreateDate(df))) {
metadata.add(LNK_METADATA_PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + LNK_METADATA_FILEEXISTS, "true");
metadata.add(ExtraProperties.LINKED_ITEMS, BasicProps.ID + ":" + iReader.getId());

// if item with same path exists, mark it.
boolean sizeMatches = false;
if (iReader.getLength() == lnkObj.getFileSize()) {
sizeMatches = true;
}

Date modifiedDate = iReader.getModDate();
if (modifiedDate != null) {
if (!df.format(modifiedDate).equals(lnkObj.getModifiedDate(df))) {
// if item moddate is different than the registered in LNK file, informs that it
// was modified after seen by this link
metadata.add(LNK_METADATA_PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + LNK_METADATA_FILEMODIFIED, "true");

// if LNK registered source last modified date is different from found source
// last modified date, adds the metadata
metadata.add(LNK_METADATA_PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + BasicProps.MODIFIED, lnkObj.getModifiedDate(dfMetadata));
}
}

Date acessedDate = iReader.getAccessDate();
if (acessedDate != null) {
// if LNK registered source access date is different from found source access
// date, adds the metadata
if (!df.format(acessedDate).equals(lnkObj.getModifiedDate(df))) {
metadata.add(LNK_METADATA_PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + BasicProps.ACCESSED, lnkObj.getAccessDate(dfMetadata));
}
}
if (!sizeMatches) {
// if item size is different than the registered in LNK file, informs that it
// was modified after seen by this link
metadata.add(LNK_METADATA_PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + LNK_METADATA_FILEMODIFIED, "true");
}
return iReader;
}
}
}
return null;
}


private void showHeader(LNKShortcut lnkObj, DateFormat df, XHTMLContentHandler xhtml) throws Exception {
xhtml.startElement("table", "class", "t"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
addRowHeader(xhtml, Messages.getString("LNKShortcutParser.FileHeader")); //$NON-NLS-1$
Expand Down
Loading