The LG Neon saves text messages as UTF-16LE text, however the leading character of the text indicates that they are saved as UTF-16BE. The only character set that is documented to be supported in apps script is UTF-8, so I did the UTF-16 decoding manually in the script.
function decodeLgText(bytes) { // The LG Neon saves text messages in UTF-16LE, with the header bytes for UTF-16BE var str = ""; for (var i = 2; i < bytes.length; i += 2) { var charcode = bytes[i] & 0xff | ((bytes[i+1] & 0xff) << 8); if (charcode < 0xd800 || charcode >= 0xe000) { str += String.fromCharCode(charcode); } else { i += 2; var charcode1 = bytes[i] & 0xff | ((bytes[i + 1] & 0xff) << 8); charcode = 0x10000 + ( ((charcode & 0x3ff) << 10) | (charcode1 & 0x3ff) ); str += String.fromCharCode(charcode); } } return str; }
Two blocks of text from the export file looks like this:
1) From : +1555555555(Sample Name) Sent : 2013/04/11 17:58 Contents : See you 227) To : +15555555555(Dear Friend) Sent : 2013/04/03 20:37 Contents : Back in my very warm fun fur hammock tonight, ther e was some pretty snow today, and i get to sleep i n a bit tomorrow :) Sweet dreams! <3The contents section of the records is interesting, the raw text message is broken up with "\r\n" line breaks, and if there was a line break in the text message it only has a "\n". The record ends with a double "\r\n" line break. The solution to parsing this that I finally settled on is two parts, the first decodes each record into an array of lines, with the indentation and message number stripped off, and the second turns the array of lines into a useful data structure.
function forEachLgBlock(blob, body) { var blocks = decodeLgText(blob.getBytes()); blocks = blocks.split("\r\n\r\n"); for(var blockIndex = 0; blockIndex < blocks.length; blockIndex++) { var block = blocks[blockIndex]; if(block == "") { continue; } var lines = block.split("\r\n", -1); lines[0] = lines[0].replace(/^\d+\)/, " "); for(var lineIndex = 0; lineIndex < lines.length; lineIndex++) { lines[lineIndex] = lines[lineIndex].replace(/^ /, ""); } body(lines); } } function forEachLgSms(blob, body) { forEachLgBlock(blob, function(lines) { var m = { type: "", subject: "", date: "", from: new Array(), to: new Array(), body: [ { type: "text", value: "" } ] }; var lineIndex; for(lineIndex = 0; lineIndex < lines.length; lineIndex++) { var line = lines[lineIndex]; var match; match = line.match(/^To : (.*?)\((.*?)\)$/); if(match) { m.to.push([ { address: match[1], name: match[2] } ]); m.type = "outgoing"; continue; } match = line.match(/^To : (.*)$/); if(match) { m.to.push([ { address: match[1] } ]); m.type = "outgoing"; continue; } match = line.match(/^From : (.*?)\((.*?)\)$/); if(match) { m.from.push([ { address: match[1], name: match[2] } ]); m.type = "incoming"; continue; } match = line.match(/^From : (.*)$/); if(match) { m.from.push([ { address: match[1] } ]); m.type = "incoming"; continue; } match = line.match(/^Sent : (\d\d\d\d)\/(\d\d)\/(\d\d) (\d\d:\d\d)$/); if(match) { m.date = match[1] + "-" + match[2] + "-" + match[3] + " " + match[4]; continue; } match = line.match(/^Contents :$/); if(match) { break; } } for(lineIndex++; lineIndex < lines.length; lineIndex++) { m.body[0].value = m.body[0].value + lines[lineIndex]; } m.body[0].value.replace(/\n\r/g, "\n"); body(m); }); }In the main program this parser is called with a callback, making it appear similar to a loop.
var messages = new Array(); // sms messages var folder = DriveApp.getFolderById(...); var files = folder.getFiles(); while(files.hasNext()) { var file = files.next(); forEachLgSms(file.getBlob(), function(message) { messages.push(message); }); }The two examples above would parse into this:
[ { type: "incoming", subject: "", date: "2013-04-11 17:58", from: [ { address: "+1555555555", name: "Sample Name" } ], to: [ ], body: [ { type: "text", value: "See you" } ] }, { type: "outgoing", subject: "", date: "2013-04-03 20:37", from: [ ], to: [ { address: "+1555555555", name: "Dear Friend" } ], body: [ { type: "text", value: "Back in my very warm fun fur hammock tonight, there was some pretty snow today, and i get to sleep in a bit tomorrow :)\nSweet dreams! <3" } ] } ]
What I did with the multi-media messages will be covered another time.
No comments:
Post a Comment