解决Scala异常处理java.lang.OutOfMemoryError: Java heap space error

需求:百万、千万、4千万级日志对设备进行除重
环境:设备内存64G,scala单机版运行shell文件
日志:
20G 48000000.log
4.0G 10000000.log
396M 1000000.log

代码如下
[scala]
import scala.io.Source
import scala.collection.mutable.ArrayBuffer
var text = Source.fromFile("/Users/shuhai/1000000.log").getLines;

//imei is required
var log = ArrayBuffer[String]();
while(text.hasNext) {
var row = text.next
if(row.contains("imei"))
log += row
}

//println(log.head)
//println(log.length)

var act = log.filter{ (row) => row.contains("active") }
var imei = act.map(_.split(",").filter(_.contains("imei"))).map(_.mkString(","))
var clean= imei.map(_.replaceAll("\"","")).map(_.replaceAll(" ",""))
println("total:" + clean.length)

var count = clean.toList.distinct.length
println("distinct:" + count)
[/scala]

异常
➜ scala scala distinct_imei.scala
java.lang.OutOfMemoryError: Java heap space
at java.util.Arrays.copyOfRange(Arrays.java:3664)
at java.lang.String.(String.java:201)
at java.io.BufferedReader.readLine(BufferedReader.java:356)
at java.io.BufferedReader.readLine(BufferedReader.java:389)
at scala.io.BufferedSource$BufferedLineIterator.hasNext(BufferedSource.scala:72)
at Main$$anon$1.
(distinct_imei.scala:7)
at Main$.main(distinct_imei.scala:1)
at Main.main(distinct_imei.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:483)
at scala.reflect.internal.util.ScalaClassLoader$$anonfun$run$1.apply(ScalaClassLoader.scala:70)
at scala.reflect.internal.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31)
at scala.reflect.internal.util.ScalaClassLoader$URLClassLoader.asContext(ScalaClassLoader.scala:101)
at scala.reflect.internal.util.ScalaClassLoader$class.run(ScalaClassLoader.scala:70)
at scala.reflect.internal.util.ScalaClassLoader$URLClassLoader.run(ScalaClassLoader.scala:101)
at scala.tools.nsc.CommonRunner$class.run(ObjectRunner.scala:22)
at scala.tools.nsc.ObjectRunner$.run(ObjectRunner.scala:39)
at scala.tools.nsc.CommonRunner$class.runAndCatch(ObjectRunner.scala:29)
at scala.tools.nsc.ObjectRunner$.runAndCatch(ObjectRunner.scala:39)
at scala.tools.nsc.ScriptRunner.scala$tools$nsc$ScriptRunner$$runCompiled(ScriptRunner.scala:170)
at scala.tools.nsc.ScriptRunner$$anonfun$runScript$1.apply(ScriptRunner.scala:187)
at scala.tools.nsc.ScriptRunner$$anonfun$runScript$1.apply(ScriptRunner.scala:187)
at scala.tools.nsc.ScriptRunner$$anonfun$withCompiledScript$1$$anonfun$apply$mcZ$sp$1.apply(ScriptRunner.scala:156)
at scala.tools.nsc.ScriptRunner$$anonfun$withCompiledScript$1.apply$mcZ$sp(ScriptRunner.scala:156)
at scala.tools.nsc.ScriptRunner$$anonfun$withCompiledScript$1.apply(ScriptRunner.scala:124)
at scala.tools.nsc.ScriptRunner$$anonfun$withCompiledScript$1.apply(ScriptRunner.scala:124)
at scala.tools.nsc.util.package$.trackingThreads(package.scala:43)
at scala.tools.nsc.util.package$.waitingForThreads(package.scala:27)
at scala.tools.nsc.ScriptRunner.withCompiledScript(ScriptRunner.scala:123)
at scala.tools.nsc.ScriptRunner.runScript(ScriptRunner.scala:187)

解决办法:
[shell]
env JAVA_OPTS="-Xms256m -Xmx2048m" scala distinct_imei.scala
[/shell]

使用scala -help可以看到有一个-J的参数,也可以实现相同的效果
[shell]
scala -J-Xms256m -J-Xmx2048m distinct_imei.scala
[/shell]

via:http://alvinalexander.com/scala/scala-repl-java.lang.outofmemoryerror-java-heap-space-error

发表评论