跳到主要内容

07、Java 19 新特性 - Vector API(第四次预览)

新的Vector API与java.util.Vector类没有关系。事实上,它是关于数学向量计算的新 API 及其与现代SIMD(单指令-多数据)CPU的映射。

详见:[JDK Enhancement Proposal 426][]

Java Vector API的使用测试

Vector API

Vector API 是Java18开始引入的一个项目(JEP 417),通过引入该API来表达向量运算,该计算在运行时可靠地编译为支持的CPU架构上的最优向量指令,从而实现优于等效标量计算的性能。

首先,简单说明一下矢量运算的基本概念,你可以简单理解为正常情况下我们使用的计算机是64位的,但是有时计算机可能需要同时处理多项任务时就意味着需要更多的位数,这其中可以通过以上链接了解到关于预测寄存器矢量寄存器相关知识,其中矢量寄存器的位数可以在128位到2048位不等,比如我的计算机的SVE寄存器位256位,那我一次可以处理256/32(int所占位数)=8个数据,具体见下列代代码。

void daxpy (double *x, double *y, double a, int n){
   
     
	for (int i= 0; i < n; i++)
	y[i] = a*x [i] + y[i];
}

 

  • p表示预测寄存器
  • z表示SVE寄存器

关于Vector API的部分说明:

1、 只支持x64和AArch64架构的CPU;
2、 目前只作用与C2编译器

编写测试代码

该项目代码使用JMH进行测试,需要导入以下依赖

		<!-- https://mvnrepository.com/artifact/org.openjdk.jmh/jmh-generator-annprocess -->
        <dependency>
            <groupId>org.openjdk.jmh</groupId>
            <artifactId>jmh-generator-annprocess</artifactId>
            <version>1.35</version>
            <scope>provided</scope>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.openjdk.jmh/jmh-core -->
        <dependency>
            <groupId>org.openjdk.jmh</groupId>
            <artifactId>jmh-core</artifactId>
            <version>1.35</version>
            <scope>provided</scope>
        </dependency>

//VM Options: --add-modules jdk.incubator.vector
import jdk.incubator.vector.IntVector;
import jdk.incubator.vector.VectorSpecies;
import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.results.format.ResultFormatType;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;

import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;

@State(Scope.Thread)
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@Warmup(iterations = 3,time = 1,timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 3,time = 1,timeUnit = TimeUnit.SECONDS)
@Fork(1)
public class JMH {
   
     
    private int[] nums;
    @Param({
   
     
            "100",
            "1000",
            "10000",
            "100000",
            "1000000",
            "10000000",
            "100000000",
            "250000000"
    })
    int size;

    @Setup
    public void setup(){
   
     
        nums = new int[size];
    }

    @Benchmark
    public int[] StreamComputation(){
   
     
        int[] result = new int[size];
        Arrays.parallelSetAll(result, i -> (nums[i] * i + nums[i] * nums[i])*-1);
        return result;
    }
    @Benchmark
    public void ParallelComputation() throws InterruptedException {
   
     
        int[] result = new int[size];
        CountDownLatch count = new CountDownLatch(8);
        for (int i = 0; i < 8; i++) {
   
     
            final int start = i;
            new Thread(()->{
   
     
                for (int j = start; j < nums.length; j += 8) {
   
     
                    result[j] = (nums[j]*j+nums[j]*nums[j])*-1;
                }
                count.countDown();
            }).start();
        }
        count.await();
    }

    @Benchmark
    public int[] Vector128Computation(){
   
     
        int[] result = new int[size];
        VectorSpecies<Integer> species = IntVector.SPECIES_128;
        int loop = species.loopBound(nums.length);
        int i = 0;
        for (; i < loop;  i += species.length()) {
   
     
            IntVector va = IntVector.fromArray(species,nums,i);
            IntVector vb = IntVector.fromArray(species,nums,i);
            IntVector vc = va.mul(va)
                    .add(vb.mul(vb))
                    .neg();
            vc.intoArray(result,i);
        }
        for (; i < nums.length; i++) {
   
     
            result[i] = nums[i]*i+nums[i]*nums[i]*(-1);
        }
        return result;
    }

    @Benchmark
    public int[] Vector256Computation(){
   
     
        int[] result = new int[size];
        VectorSpecies<Integer> species = IntVector.SPECIES_256;
        int loop = species.loopBound(nums.length);
        int i = 0;
        for (; i < loop;  i += species.length()) {
   
     
            IntVector va = IntVector.fromArray(species,nums,i);
            IntVector vb = IntVector.fromArray(species,nums,i);
            IntVector vc = va.mul(va)
                    .add(vb.mul(vb))
                    .neg();
            vc.intoArray(result,i);
        }
        for (; i < nums.length; i++) {
   
     
            result[i] = nums[i]*i+nums[i]*nums[i]*(-1);
        }
        return result;
    }

    @Benchmark
    public int[] Vector512Computation(){
   
     
        int[] result = new int[size];
        VectorSpecies<Integer> species = IntVector.SPECIES_512;
        int loop = species.loopBound(nums.length);
        int i = 0;
        for (; i < loop;  i += species.length()) {
   
     
            IntVector va = IntVector.fromArray(species,nums,i);
            IntVector vb = IntVector.fromArray(species,nums,i);
            IntVector vc = va.mul(va)
                    .add(vb.mul(vb))
                    .neg();
            vc.intoArray(result,i);
        }
        for (; i < nums.length; i++) {
   
     
            result[i] = nums[i]*i+nums[i]*nums[i]*(-1);
        }
        return result;
    }

    @Benchmark
    public int[] defaultComputation(){
   
     
        int[] result = new int[size];
        for (int i = 0; i < nums.length; i++) {
   
     
            result[i] = (nums[i]*i+nums[i]*nums[i])*-1;
        }
        return result;
    }

    public static void main(String[] args) throws RunnerException, IOException {
   
     
        Options opts = new OptionsBuilder()
                .include(JMH.class.getSimpleName())
                .resultFormat(ResultFormatType.JSON)
                .output(new File("jmh.log").getCanonicalPath())
                .build();
        new Runner(opts).run();
    }

}

测试结果

Benchmark                    (size)  Mode  Cnt        Score         Error  Units
JMH.ParallelComputation         100  avgt    3       29.676 ±       3.856  us/op
JMH.ParallelComputation        1000  avgt    3       32.549 ±      28.265  us/op
JMH.ParallelComputation       10000  avgt    3       45.063 ±       9.727  us/op
JMH.ParallelComputation      100000  avgt    3      219.920 ±      37.503  us/op
JMH.ParallelComputation     1000000  avgt    3     2070.604 ±     700.193  us/op
JMH.ParallelComputation    10000000  avgt    3    15660.505 ±    2796.283  us/op
JMH.ParallelComputation   100000000  avgt    3   256957.510 ± 1039982.820  us/op
JMH.ParallelComputation   250000000  avgt    3   652928.650 ±  175108.941  us/op
JMH.StreamComputation           100  avgt    3        8.579 ±       1.535  us/op
JMH.StreamComputation          1000  avgt    3        9.602 ±       0.726  us/op
JMH.StreamComputation         10000  avgt    3       20.908 ±       3.043  us/op
JMH.StreamComputation        100000  avgt    3       78.725 ±      15.190  us/op
JMH.StreamComputation       1000000  avgt    3     1080.335 ±     444.759  us/op
JMH.StreamComputation      10000000  avgt    3     7989.456 ±    3523.566  us/op
JMH.StreamComputation     100000000  avgt    3    77524.201 ±   30073.807  us/op
JMH.StreamComputation     250000000  avgt    3   192820.889 ±    7839.996  us/op
JMH.Vector128Computation        100  avgt    3        0.061 ±       0.007  us/op
JMH.Vector128Computation       1000  avgt    3        0.643 ±       0.048  us/op
JMH.Vector128Computation      10000  avgt    3        6.323 ±       0.553  us/op
JMH.Vector128Computation     100000  avgt    3       59.549 ±      34.958  us/op
JMH.Vector128Computation    1000000  avgt    3     1174.302 ±     404.046  us/op
JMH.Vector128Computation   10000000  avgt    3     8473.528 ±    1039.583  us/op
JMH.Vector128Computation  100000000  avgt    3    82410.628 ±   19711.603  us/op
JMH.Vector128Computation  250000000  avgt    3   205339.393 ±   52689.755  us/op
JMH.Vector256Computation        100  avgt    3        0.073 ±       0.012  us/op
JMH.Vector256Computation       1000  avgt    3        0.591 ±       0.034  us/op
JMH.Vector256Computation      10000  avgt    3        5.700 ±       1.486  us/op
JMH.Vector256Computation     100000  avgt    3       56.608 ±      13.098  us/op
JMH.Vector256Computation    1000000  avgt    3     1139.838 ±     273.834  us/op
JMH.Vector256Computation   10000000  avgt    3     8321.221 ±    4300.157  us/op
JMH.Vector256Computation  100000000  avgt    3    82288.278 ±   45482.083  us/op
JMH.Vector256Computation  250000000  avgt    3   204230.229 ±   89249.522  us/op
JMH.Vector512Computation        100  avgt    3        0.587 ±       0.056  us/op
JMH.Vector512Computation       1000  avgt    3        6.084 ±       1.852  us/op
JMH.Vector512Computation      10000  avgt    3       64.068 ±       2.557  us/op
JMH.Vector512Computation     100000  avgt    3      612.263 ±      93.553  us/op
JMH.Vector512Computation    1000000  avgt    3     6541.734 ±    5697.026  us/op
JMH.Vector512Computation   10000000  avgt    3    61163.729 ±   11045.924  us/op
JMH.Vector512Computation  100000000  avgt    3   767615.083 ±  241938.527  us/op
JMH.Vector512Computation  250000000  avgt    3  1632611.033 ± 1793126.085  us/op
JMH.defaultComputation          100  avgt    3        0.105 ±       0.008  us/op
JMH.defaultComputation         1000  avgt    3        0.936 ±       0.059  us/op
JMH.defaultComputation        10000  avgt    3        9.323 ±       0.311  us/op
JMH.defaultComputation       100000  avgt    3       77.149 ±      11.756  us/op
JMH.defaultComputation      1000000  avgt    3     1308.792 ±     828.180  us/op
JMH.defaultComputation     10000000  avgt    3     9623.205 ±    3032.074  us/op
JMH.defaultComputation    100000000  avgt    3    92491.573 ±   17694.014  us/op
JMH.defaultComputation    250000000  avgt    3   225411.133 ±   18094.859  us/op